From dc0deb5e49a759e4868e6b5f8b288959f51461b2 Mon Sep 17 00:00:00 2001 From: Porcupiney Hairs Date: Sun, 2 Jul 2023 17:38:01 +0530 Subject: [PATCH 01/33] Go : Improvements to DSN Injection query --- go/ql/src/experimental/{CWE-134 => CWE-74}/DsnBad.go | 0 go/ql/src/experimental/{CWE-134 => CWE-74}/DsnGood.go | 0 .../experimental/{CWE-134 => CWE-74}/DsnInjection.qhelp | 0 go/ql/src/experimental/{CWE-134 => CWE-74}/DsnInjection.ql | 6 +++--- .../{CWE-134 => CWE-74}/DsnInjectionCustomizations.qll | 7 +++++-- .../experimental/{CWE-134 => CWE-74}/DsnInjectionLocal.ql | 2 +- go/ql/test/experimental/CWE-134/DsnInjection.qlref | 1 - go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref | 1 - go/ql/test/experimental/{CWE-134 => CWE-74}/Dsn.go | 0 .../experimental/{CWE-134 => CWE-74}/DsnInjection.expected | 2 +- go/ql/test/experimental/CWE-74/DsnInjection.qlref | 1 + .../{CWE-134 => CWE-74}/DsnInjectionLocal.expected | 0 go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref | 1 + 13 files changed, 12 insertions(+), 9 deletions(-) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnBad.go (100%) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnGood.go (100%) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnInjection.qhelp (100%) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnInjection.ql (81%) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnInjectionCustomizations.qll (90%) rename go/ql/src/experimental/{CWE-134 => CWE-74}/DsnInjectionLocal.ql (96%) delete mode 100644 go/ql/test/experimental/CWE-134/DsnInjection.qlref delete mode 100644 go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref rename go/ql/test/experimental/{CWE-134 => CWE-74}/Dsn.go (100%) rename go/ql/test/experimental/{CWE-134 => CWE-74}/DsnInjection.expected (80%) create mode 100644 go/ql/test/experimental/CWE-74/DsnInjection.qlref rename go/ql/test/experimental/{CWE-134 => CWE-74}/DsnInjectionLocal.expected (100%) create mode 100644 go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref diff --git a/go/ql/src/experimental/CWE-134/DsnBad.go b/go/ql/src/experimental/CWE-74/DsnBad.go similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnBad.go rename to go/ql/src/experimental/CWE-74/DsnBad.go diff --git a/go/ql/src/experimental/CWE-134/DsnGood.go b/go/ql/src/experimental/CWE-74/DsnGood.go similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnGood.go rename to go/ql/src/experimental/CWE-74/DsnGood.go diff --git a/go/ql/src/experimental/CWE-134/DsnInjection.qhelp b/go/ql/src/experimental/CWE-74/DsnInjection.qhelp similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnInjection.qhelp rename to go/ql/src/experimental/CWE-74/DsnInjection.qhelp diff --git a/go/ql/src/experimental/CWE-134/DsnInjection.ql b/go/ql/src/experimental/CWE-74/DsnInjection.ql similarity index 81% rename from go/ql/src/experimental/CWE-134/DsnInjection.ql rename to go/ql/src/experimental/CWE-74/DsnInjection.ql index 89bb83f9284..3197d04534b 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjection.ql +++ b/go/ql/src/experimental/CWE-74/DsnInjection.ql @@ -6,7 +6,7 @@ * @id go/dsn-injection * @tags security * experimental - * external/cwe/cwe-134 + * external/cwe/cwe-74 */ import go @@ -18,5 +18,5 @@ private class UntrustedFlowAsSource extends Source instanceof UntrustedFlowSourc from DsnInjection cfg, DataFlow::PathNode source, DataFlow::PathNode sink where cfg.hasFlowPath(source, sink) -select sink.getNode(), source, sink, "This query depends on a $@.", source.getNode(), - "user-provided value" +select sink.getNode(), source, sink, "Data-Source Name is built using $@.", source.getNode(), + "untrusted user input" diff --git a/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll b/go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll similarity index 90% rename from go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll rename to go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll index de547b8a07d..e66cb7cba73 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll +++ b/go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll @@ -14,8 +14,11 @@ class DsnInjection extends TaintTracking::Configuration { override predicate isSource(DataFlow::Node node) { node instanceof Source } override predicate isSink(DataFlow::Node node) { - exists(Function f | f.hasQualifiedName("database/sql", "Open") | - node = f.getACall().getArgument(1) + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("database/sql", "Open") and + c.getArgument(0).getStringValue() = "mysql" + | + node = c.getArgument(1) ) } diff --git a/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql b/go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql similarity index 96% rename from go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql rename to go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql index 7ecd3b1cc8a..ff4142f12d4 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql +++ b/go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql @@ -6,7 +6,7 @@ * @id go/dsn-injection-local * @tags security * experimental - * external/cwe/cwe-134 + * external/cwe/cwe-74 */ import go diff --git a/go/ql/test/experimental/CWE-134/DsnInjection.qlref b/go/ql/test/experimental/CWE-134/DsnInjection.qlref deleted file mode 100644 index c2308280884..00000000000 --- a/go/ql/test/experimental/CWE-134/DsnInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/CWE-134/DsnInjection.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref b/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref deleted file mode 100644 index b7b7e2bdbdd..00000000000 --- a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/CWE-134/DsnInjectionLocal.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/Dsn.go b/go/ql/test/experimental/CWE-74/Dsn.go similarity index 100% rename from go/ql/test/experimental/CWE-134/Dsn.go rename to go/ql/test/experimental/CWE-74/Dsn.go diff --git a/go/ql/test/experimental/CWE-134/DsnInjection.expected b/go/ql/test/experimental/CWE-74/DsnInjection.expected similarity index 80% rename from go/ql/test/experimental/CWE-134/DsnInjection.expected rename to go/ql/test/experimental/CWE-74/DsnInjection.expected index 531bdb0ead2..2f92b181757 100644 --- a/go/ql/test/experimental/CWE-134/DsnInjection.expected +++ b/go/ql/test/experimental/CWE-74/DsnInjection.expected @@ -9,4 +9,4 @@ nodes | Dsn.go:50:29:50:33 | dbDSN | semmle.label | dbDSN | subpaths #select -| Dsn.go:50:29:50:33 | dbDSN | Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN | This query depends on a $@. | Dsn.go:47:10:47:30 | call to FormValue | user-provided value | +| Dsn.go:50:29:50:33 | dbDSN | Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN | Data-Source Name is built using $@. | Dsn.go:47:10:47:30 | call to FormValue | untrusted user input | diff --git a/go/ql/test/experimental/CWE-74/DsnInjection.qlref b/go/ql/test/experimental/CWE-74/DsnInjection.qlref new file mode 100644 index 00000000000..7bd852e221f --- /dev/null +++ b/go/ql/test/experimental/CWE-74/DsnInjection.qlref @@ -0,0 +1 @@ +experimental/CWE-74/DsnInjection.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.expected similarity index 100% rename from go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected rename to go/ql/test/experimental/CWE-74/DsnInjectionLocal.expected diff --git a/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref new file mode 100644 index 00000000000..97fa2ad022a --- /dev/null +++ b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref @@ -0,0 +1 @@ +experimental/CWE-74/DsnInjectionLocal.ql \ No newline at end of file From 2947f176ef90570f854e18acee024a9dff9b8bed Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 13 Jul 2023 09:21:12 +0200 Subject: [PATCH 02/33] Docs: Update data flow documentation to the new API. --- .../analyzing-data-flow-in-cpp-new.rst | 110 ++++++++--------- .../analyzing-data-flow-in-cpp.rst | 112 ++++++++---------- .../analyzing-data-flow-in-csharp.rst | 92 ++++++-------- .../analyzing-data-flow-in-java.rst | 85 ++++++------- .../analyzing-data-flow-in-python.rst | 70 +++++------ .../analyzing-data-flow-in-ruby.rst | 76 +++++------- .../codeql-library-for-go.rst | 10 +- .../using-api-graphs-in-ruby.rst | 14 +-- .../ql-training/cpp/global-data-flow-cpp.rst | 21 ++-- .../ql-training/java/apache-struts-java.rst | 17 ++- .../java/global-data-flow-java.rst | 29 +++-- .../cpp/global-data-flow-cpp-1.ql | 14 +-- .../java/global-data-flow-java-1.ql | 14 +-- .../slide-snippets/global-data-flow.rst | 14 +-- .../slide-snippets/path-queries.rst | 12 +- .../creating-path-queries.rst | 41 ++++--- ...g-data-flow-queries-using-partial-flow.rst | 48 ++++---- docs/ql-design-patterns.md | 4 +- 18 files changed, 352 insertions(+), 431 deletions(-) diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst index 911c930458e..dacfa7c283e 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst @@ -168,74 +168,61 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration`` as follows: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.new.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + The following predicates are defined in the configuration: - ``isSource``—defines where data may flow from - ``isSink``—defines where data may flow to - ``isBarrier``—optional, restricts the data flow -- ``isBarrierGuard``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration`` as follows: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.new.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -The following predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isSanitizerGuard``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ @@ -247,17 +234,15 @@ The following data flow configuration tracks data flow from environment variable import cpp import semmle.code.cpp.dataflow.new.DataFlow - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Function getenv | source.asIndirectExpr(1).(FunctionCall).getTarget() = getenv and getenv.hasGlobalName("getenv") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasGlobalName("fopen") @@ -265,16 +250,17 @@ The following data flow configuration tracks data flow from environment variable } } + module EnvironmentToFileFlow = DataFlow::Global; + from - Expr getenv, Expr fopen, EnvironmentToFileConfiguration config, DataFlow::Node source, - DataFlow::Node sink + Expr getenv, Expr fopen, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = getenv and sink.asIndirectExpr(1) = fopen and - config.hasFlow(source, sink) + EnvironmentToFileFlow::flow(source, sink) select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'" -The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes. +The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isBarrier`` to prevent taint from propagating through them. It also uses ``isAdditionalFlowStep`` to add flow from loop bounds to loop indexes. .. code-block:: ql @@ -282,18 +268,16 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` import semmle.code.cpp.controlflow.Guards import semmle.code.cpp.dataflow.new.TaintTracking - class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration { - NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" } - - override predicate isSource(DataFlow::Node node) { + module NetworkToBufferSizeConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl") } - override predicate isSink(DataFlow::Node node) { + predicate isSink(DataFlow::Node node) { exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset()) } - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists(Loop loop, LoopCounter lc | loop = lc.getALoop() and loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr() @@ -302,7 +286,7 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` ) } - override predicate isSanitizer(DataFlow::Node node) { + predicate isBarrier(DataFlow::Node node) { exists(GuardCondition gc, Variable v | gc.getAChild*() = v.getAnAccess() and node.asExpr() = v.getAnAccess() and @@ -312,8 +296,10 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` } } - from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf - where conf.hasFlow(ntohl, offset) + module NetworkToBufferSizeFlow = TaintTracking::Global; + + from DataFlow::Node ntohl, DataFlow::Node offset + where NetworkToBufferSizeFlow::flow(ntohl, offset) select offset, "This array offset may be influenced by $@.", ntohl, "converted data from the network" @@ -353,14 +339,12 @@ Exercise 2 import cpp import semmle.code.cpp.dataflow.new.DataFlow - class LiteralToGethostbynameConfiguration extends DataFlow::Configuration { - LiteralToGethostbynameConfiguration() { this = "LiteralToGethostbynameConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module LiteralToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asIndirectExpr(1) instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname") @@ -368,13 +352,14 @@ Exercise 2 } } + module LiteralToGethostbynameFlow = DataFlow::Global; + from - StringLiteral sl, FunctionCall fc, LiteralToGethostbynameConfiguration cfg, DataFlow::Node source, - DataFlow::Node sink + StringLiteral sl, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = sl and sink.asIndirectExpr(1) = fc.getArgument(0) and - cfg.hasFlow(source, sink) + LiteralToGethostbynameFlow::flow(source, sink) select sl, fc Exercise 3 @@ -401,12 +386,10 @@ Exercise 4 GetenvSource() { this.asIndirectExpr(1).(FunctionCall).getTarget().hasGlobalName("getenv") } } - class GetenvToGethostbynameConfiguration extends DataFlow::Configuration { - GetenvToGethostbynameConfiguration() { this = "GetenvToGethostbynameConfiguration" } + module GetenvToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname") @@ -414,13 +397,14 @@ Exercise 4 } } + module GetenvToGethostbynameFlow = DataFlow::Global; + from - Expr getenv, FunctionCall fc, GetenvToGethostbynameConfiguration cfg, DataFlow::Node source, - DataFlow::Node sink + Expr getenv, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = getenv and sink.asIndirectExpr(1) = fc.getArgument(0) and - cfg.hasFlow(source, sink) + GetenvToGethostbynameFlow::flow(source, sink) select getenv, fc Further reading @@ -430,4 +414,4 @@ Further reading .. include:: ../reusables/cpp-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst index b70bfa7a3f8..4fc31079f0b 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst @@ -152,74 +152,62 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration`` as follows: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + + The following predicates are defined in the configuration: - ``isSource``—defines where data may flow from - ``isSink``—defines where data may flow to - ``isBarrier``—optional, restricts the data flow -- ``isBarrierGuard``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration`` as follows: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -The following predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isSanitizerGuard``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ @@ -230,17 +218,15 @@ The following data flow configuration tracks data flow from environment variable import semmle.code.cpp.dataflow.DataFlow - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists (Function getenv | source.asExpr().(FunctionCall).getTarget() = getenv and getenv.hasGlobalName("getenv") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasGlobalName("fopen") @@ -248,12 +234,14 @@ The following data flow configuration tracks data flow from environment variable } } - from Expr getenv, Expr fopen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(getenv), DataFlow::exprNode(fopen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr getenv, Expr fopen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(getenv), DataFlow::exprNode(fopen)) select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'" -The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes. +The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isBarrier`` to prevent taint from propagating through them. It also uses ``isAdditionalFlowStep`` to add flow from loop bounds to loop indexes. .. code-block:: ql @@ -261,18 +249,16 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` import semmle.code.cpp.controlflow.Guards import semmle.code.cpp.dataflow.TaintTracking - class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration { - NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" } - - override predicate isSource(DataFlow::Node node) { + module NetworkToBufferSizeConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl") } - override predicate isSink(DataFlow::Node node) { + predicate isSink(DataFlow::Node node) { exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset()) } - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists(Loop loop, LoopCounter lc | loop = lc.getALoop() and loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr() | @@ -280,7 +266,7 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` ) } - override predicate isSanitizer(DataFlow::Node node) { + predicate isBarrier(DataFlow::Node node) { exists(GuardCondition gc, Variable v | gc.getAChild*() = v.getAnAccess() and node.asExpr() = v.getAnAccess() and @@ -289,8 +275,10 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` } } - from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf - where conf.hasFlow(ntohl, offset) + module NetworkToBufferSizeFlow = TaintTracking::Global; + + from DataFlow::Node ntohl, DataFlow::Node offset + where NetworkToBufferSizeFlow::flow(ntohl, offset) select offset, "This array offset may be influenced by $@.", ntohl, "converted data from the network" @@ -327,24 +315,22 @@ Exercise 2 import semmle.code.cpp.dataflow.DataFlow - class LiteralToGethostbynameConfiguration extends DataFlow::Configuration { - LiteralToGethostbynameConfiguration() { - this = "LiteralToGethostbynameConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module LiteralToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asExpr() instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname")) } } - from StringLiteral sl, FunctionCall fc, LiteralToGethostbynameConfiguration cfg - where cfg.hasFlow(DataFlow::exprNode(sl), DataFlow::exprNode(fc.getArgument(0))) + module LiteralToGethostbynameFlow = DataFlow::Global; + + from StringLiteral sl, FunctionCall fc + where LiteralToGethostbynameFlow::flow(DataFlow::exprNode(sl), DataFlow::exprNode(fc.getArgument(0))) select sl, fc Exercise 3 @@ -373,24 +359,22 @@ Exercise 4 } } - class GetenvToGethostbynameConfiguration extends DataFlow::Configuration { - GetenvToGethostbynameConfiguration() { - this = "GetenvToGethostbynameConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module GetenvToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname")) } } - from DataFlow::Node getenv, FunctionCall fc, GetenvToGethostbynameConfiguration cfg - where cfg.hasFlow(getenv, DataFlow::exprNode(fc.getArgument(0))) + module GetenvToGethostbynameFlow = DataFlow::Global; + + from DataFlow::Node getenv, FunctionCall fc + where GetenvToGethostbynameFlow::flow(getenv, DataFlow::exprNode(fc.getArgument(0))) select getenv.asExpr(), fc Further reading @@ -400,4 +384,4 @@ Further reading .. include:: ../reusables/cpp-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst index 4d715b7313c..f5b2fe29cb3 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst @@ -146,24 +146,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration``: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import csharp - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -171,45 +171,36 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import csharp - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ @@ -228,12 +219,8 @@ This query shows a data flow configuration that uses all public API parameters a import csharp import semmle.code.csharp.dataflow.flowsources.PublicCallableParameter - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { - this = "..." - } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof PublicCallableParameterFlowSource } @@ -243,7 +230,6 @@ This query shows a data flow configuration that uses all public API parameters a Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data flow node. - ``DataFlow::ExprNode`` - an expression behaving as a data flow node. @@ -261,8 +247,6 @@ Class hierarchy - ``WcfRemoteFlowSource`` - data flow from a WCF web service. - ``AspNetServiceRemoteFlowSource`` - data flow from an ASP.NET web service. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples ~~~~~~~~ @@ -272,17 +256,15 @@ This data flow configuration tracks data flow from environment variables to open import csharp - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "Environment opening files" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Method m | m = source.asExpr().(MethodCall).getTarget() and m.hasQualifiedName("System.Environment.GetEnvironmentVariable") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(MethodCall mc | mc.getTarget().hasQualifiedName("System.IO.File.Open") and sink.asExpr() = mc.getArgument(0) @@ -290,8 +272,10 @@ This data flow configuration tracks data flow from environment variables to open } } - from Expr environment, Expr fileOpen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr environment, Expr fileOpen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) select fileOpen, "This 'File.Open' uses data from $@.", environment, "call to 'GetEnvironmentVariable'" @@ -435,21 +419,21 @@ Exercise 2 import csharp - class Configuration extends DataFlow::Configuration { - Configuration() { this="String to System.Uri" } - - override predicate isSource(DataFlow::Node src) { + module StringToUriConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src.asExpr().hasValue() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call c | c.getTarget().(Constructor).getDeclaringType().hasQualifiedName("System.Uri") and sink.asExpr()=c.getArgument(0)) } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module StringToUriFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where StringToUriFlow::flow(src, sink) select src, "This string constructs a 'System.Uri' $@.", sink, "here" Exercise 3 @@ -476,21 +460,21 @@ Exercise 4 } } - class Configuration extends DataFlow::Configuration { - Configuration() { this="Environment to System.Uri" } - - override predicate isSource(DataFlow::Node src) { + module EnvironmentToUriConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src instanceof EnvironmentVariableFlowSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call c | c.getTarget().(Constructor).getDeclaringType().hasQualifiedName("System.Uri") and sink.asExpr()=c.getArgument(0)) } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module EnvironmentToUriFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where EnvironmentToUriFlow::flow(src, sink) select src, "This environment variable constructs a 'System.Uri' $@.", sink, "here" Exercise 5 diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst index 2eccdf5e103..128e8549673 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst @@ -160,24 +160,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -You use the global data flow library by extending the class ``DataFlow::Configuration``: +You use the global data flow library by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import semmle.code.java.dataflow.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource``—defines where data may flow from @@ -185,47 +185,36 @@ These predicates are defined in the configuration: - ``isBarrier``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be a unique name, for example, the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. You use the global taint tracking library by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. You use the global taint tracking library by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import semmle.code.java.dataflow.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ @@ -242,18 +231,16 @@ This query shows a taint-tracking configuration that uses remote user input as d import java import semmle.code.java.dataflow.FlowSources - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { - this = "..." - } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } ... } + module MyTaintFlow = TaintTracking::Global; + Exercises ~~~~~~~~~ @@ -287,16 +274,12 @@ Exercise 2 import semmle.code.java.dataflow.DataFlow - class Configuration extends DataFlow::Configuration { - Configuration() { - this = "LiteralToURL Configuration" - } - - override predicate isSource(DataFlow::Node source) { + module LiteralToURLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asExpr() instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call call | sink.asExpr() = call.getArgument(0) and call.getCallee().(Constructor).getDeclaringType().hasQualifiedName("java.net", "URL") @@ -304,8 +287,10 @@ Exercise 2 } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module LiteralToURLFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where LiteralToURLFlow::flow(src, sink) select src, "This string constructs a URL $@.", sink, "here" Exercise 3 @@ -340,16 +325,12 @@ Exercise 4 } } - class GetenvToURLConfiguration extends DataFlow::Configuration { - GetenvToURLConfiguration() { - this = "GetenvToURLConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module GetenvToURLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call call | sink.asExpr() = call.getArgument(0) and call.getCallee().(Constructor).getDeclaringType().hasQualifiedName("java.net", "URL") @@ -357,8 +338,10 @@ Exercise 4 } } - from DataFlow::Node src, DataFlow::Node sink, GetenvToURLConfiguration config - where config.hasFlow(src, sink) + module GetenvToURLFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where GetenvToURLFlow::flow(src, sink) select src, "This environment variable constructs a URL $@.", sink, "here" Further reading @@ -368,4 +351,4 @@ Further reading .. include:: ../reusables/java-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst index 2ad5fc925f0..4a0d12e1f46 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst @@ -204,24 +204,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration``: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import python - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -229,45 +229,36 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name (for instance the class name). - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import python - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -283,7 +274,6 @@ For global flow, it is also useful to restrict sources to instances of ``LocalSo Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data flow node. - ``DataFlow::CfgNode`` - a control-flow node behaving as a data flow node. @@ -305,8 +295,6 @@ Class hierarchy - ``Concepts::HTTP::Server::RouteSetup`` - a data-flow node that sets up a route on a server. - ``Concepts::HTTP::Server::HttpResponse`` - a data-flow node that creates a HTTP response on a server. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples ~~~~~~~~ @@ -320,20 +308,20 @@ This query shows a data flow configuration that uses all network input as data s import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.Concepts - class RemoteToFileConfiguration extends TaintTracking::Configuration { - RemoteToFileConfiguration() { this = "RemoteToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module RemoteToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fa).getAPathArgument() } } - from DataFlow::Node input, DataFlow::Node fileAccess, RemoteToFileConfiguration config - where config.hasFlow(input, fileAccess) + module RemoteToFileFlow = TaintTracking::Global; + + from DataFlow::Node input, DataFlow::Node fileAccess + where RemoteToFileFlow::flow(input, fileAccess) select fileAccess, "This file access uses data from $@.", input, "user-controllable input." @@ -345,14 +333,12 @@ This data flow configuration tracks data flow from environment variables to open import semmle.python.dataflow.new.TaintTracking import semmle.python.ApiGraphs - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source = API::moduleImport("os").getMember("getenv").getACall() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(DataFlow::CallCfgNode call | call = API::moduleImport("os").getMember("open").getACall() and sink = call.getArg(0) @@ -360,8 +346,10 @@ This data flow configuration tracks data flow from environment variables to open } } - from Expr environment, Expr fileOpen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr environment, Expr fileOpen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) select fileOpen, "This call to 'os.open' uses data from $@.", environment, "call to 'os.getenv'" diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst index b326bfa59aa..4b132261de3 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst @@ -224,24 +224,24 @@ However, global data flow is less precise than local data flow, and the analysis Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -You can use the global data flow library by extending the class ``DataFlow::Configuration``: +You can use the global data flow library by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import codeql.ruby.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -249,14 +249,12 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name (for instance the class name). - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking @@ -264,33 +262,26 @@ Using global taint tracking Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. -The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import codeql.ruby.DataFlow import codeql.ruby.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module is completely similar to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -306,7 +297,6 @@ The predefined sources generally do that. Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data-flow node. - ``DataFlow::LocalSourceNode`` - a local origin of data, as a data-flow node. - ``DataFlow::ExprNode`` - an expression behaving as a data-flow node. @@ -321,13 +311,11 @@ Class hierarchy - ``Concepts::HTTP::Server::RouteSetup`` - a data-flow node that sets up a route on a server. - ``Concepts::HTTP::Server::HttpResponse`` - a data-flow node that creates an HTTP response on a server. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples of global data flow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following global taint-tracking query finds path arguments in filesystem accesses that can be controlled by a remote user. - - Since this is a taint-tracking query, the configuration class extends ``TaintTracking::Configuration``. + - Since this is a taint-tracking query, the ``TaintTracking::Global`` module is used. - The ``isSource`` predicate defines sources as any data-flow nodes that are instances of ``RemoteFlowSource``. - The ``isSink`` predicate defines sinks as path arguments in any filesystem access, using ``FileSystemAccess`` from the ``Concepts`` library. @@ -338,22 +326,22 @@ The following global taint-tracking query finds path arguments in filesystem acc import codeql.ruby.Concepts import codeql.ruby.dataflow.RemoteFlowSources - class RemoteToFileConfiguration extends TaintTracking::Configuration { - RemoteToFileConfiguration() { this = "RemoteToFileConfiguration" } + module RemoteToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fa).getAPathArgument() } } + + module RemoteToFileFlow = TaintTracking::Global; - from DataFlow::Node input, DataFlow::Node fileAccess, RemoteToFileConfiguration config - where config.hasFlow(input, fileAccess) + from DataFlow::Node input, DataFlow::Node fileAccess + where RemoteToFileFlow::flow(input, fileAccess) select fileAccess, "This file access uses data from $@.", input, "user-controllable input." The following global data-flow query finds calls to ``File.open`` where the filename argument comes from an environment variable. - - Since this is a data-flow query, the configuration class extends ``DataFlow::Configuration``. + - Since this is a data-flow query, the ``DataFlow::Global`` module is used. - The ``isSource`` predicate defines sources as expression nodes representing lookups on the ``ENV`` hash. - The ``isSink`` predicate defines sinks as the first argument in any call to ``File.open``. @@ -363,23 +351,23 @@ The following global data-flow query finds calls to ``File.open`` where the file import codeql.ruby.controlflow.CfgNodes import codeql.ruby.ApiGraphs - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(ExprNodes::ConstantReadAccessCfgNode env | env.getExpr().getName() = "ENV" and env = source.asExpr().(ExprNodes::ElementReferenceCfgNode).getReceiver() ) } - - override predicate isSink(DataFlow::Node sink) { + + predicate isSink(DataFlow::Node sink) { sink = API::getTopLevelMember("File").getAMethodCall("open").getArgument(0) } } + + module EnvironmentToFileFlow = DataFlow::Global; - from EnvironmentToFileConfiguration config, DataFlow::Node environment, DataFlow::Node fileOpen - where config.hasFlow(environment, fileOpen) + from DataFlow::Node environment, DataFlow::Node fileOpen + where EnvironmentToFileFlow::flow(environment, fileOpen) select fileOpen, "This call to 'File.open' uses data from $@.", environment, "an environment variable" diff --git a/docs/codeql/codeql-language-guides/codeql-library-for-go.rst b/docs/codeql/codeql-language-guides/codeql-library-for-go.rst index f1f77c5150e..33468ab7a71 100644 --- a/docs/codeql/codeql-language-guides/codeql-library-for-go.rst +++ b/docs/codeql/codeql-language-guides/codeql-library-for-go.rst @@ -494,14 +494,14 @@ which are sets of data-flow nodes. Given these three sets, CodeQL provides a gen finding paths from a source to a sink, possibly going into and out of functions and fields, but never flowing through a barrier. -To define a data-flow configuration, you can define a subclass of ``DataFlow::Configuration``, -overriding the member predicates ``isSource``, ``isSink``, and ``isBarrier`` to define the sets of -sources, sinks, and barriers. +To define a data-flow configuration, you can define a module implementing ``DataFlow::ConfigSig``, +including the predicates ``isSource``, ``isSink``, and ``isBarrier`` to define the sets of +sources, sinks, and barriers. Data flow is then computed by applying +``DataFlow::Global<..>`` to the configuration. Going beyond pure data flow, many security analyses need to perform more general `taint tracking`, which also considers flow through value-transforming operations such as string operations. To track -taint, you can define a subclass of ``TaintTracking::Configuration``, which works similar to -data-flow configurations. +taint, you apply ``TaintTracking::Global<..>`` to your configuration instead. A detailed exposition of global data flow and taint tracking is out of scope for this brief introduction. For a general overview of data flow and taint tracking, see ":ref:`About data flow analysis `." diff --git a/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst b/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst index 7ac699b61c2..58a16b4a464 100644 --- a/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst +++ b/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst @@ -161,20 +161,20 @@ is read flows into a call to ``File.write``. import codeql.ruby.DataFlow import codeql.ruby.ApiGraphs - class Configuration extends DataFlow::Configuration { - Configuration() { this = "File read/write Configuration" } - - override predicate isSource(DataFlow::Node source) { + module Configuration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source = API::getTopLevelMember("File").getMethod("read").getReturn().asSource() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = API::getTopLevelMember("File").getMethod("write").getParameter(1).asSink() } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module Flow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where Flow::flow(src, sink) select src, "The data read here flows into a $@ call.", sink, "File.write" Further reading diff --git a/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst b/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst index 7be2a07c4af..e8dbcfcc524 100644 --- a/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst +++ b/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst @@ -62,8 +62,8 @@ The library class ``SecurityOptions`` provides a (configurable) model of what co import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSource(DataFlow::Node source) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists (SecurityOptions opts | opts.isUserInput(source.asExpr(), _) ) @@ -85,8 +85,8 @@ Use the ``FormattingFunction`` class to fill in the definition of ``isSink``. import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { /* Fill me in */ } ... @@ -105,8 +105,8 @@ Use the ``FormattingFunction`` class, we can write the sink as: import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { exists (FormattingFunction ff, Call c | c.getTarget() = ff and c.getArgument(ff.getFormatParameterIndex()) = sink.asExpr() @@ -132,9 +132,8 @@ Add an additional taint step that (heuristically) taints a local variable if it .. code-block:: ql - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isAdditionalTaintStep(DataFlow::Node pred, - DataFlow::Node succ) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists (Call c, Expr arg, LocalVariable lv | arg = c.getAnArgument() and arg = pred.asExpr() and @@ -153,8 +152,8 @@ Add a sanitizer, stopping propagation at parameters of formatting functions, to .. code-block:: ql - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSanitizer(DataFlow::Node nd) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isBarrier(DataFlow::Node nd) { exists (FormattingFunction ff, int idx | idx = ff.getFormatParameterIndex() and nd = DataFlow::parameterNode(ff.getParameter(idx)) diff --git a/docs/codeql/ql-training/java/apache-struts-java.rst b/docs/codeql/ql-training/java/apache-struts-java.rst index e85276e14f0..0028b1b77ac 100644 --- a/docs/codeql/ql-training/java/apache-struts-java.rst +++ b/docs/codeql/ql-training/java/apache-struts-java.rst @@ -71,7 +71,7 @@ Finding the RCE yourself **Hint**: Use ``Method.getDeclaringType()`` and ``Type.getASupertype()`` -#. Implement a ``DataFlow::Configuration``, defining the source as the first parameter of a ``toObject`` method, and the sink as an instance of ``UnsafeDeserializationSink``. +#. Implement a ``DataFlow::ConfigSig``, defining the source as the first parameter of a ``toObject`` method, and the sink as an instance of ``UnsafeDeserializationSink``. **Hint**: Use ``Node::asParameter()`` @@ -114,13 +114,13 @@ Model answer, step 3 * Configuration that tracks the flow of taint from the first parameter of * `ContentTypeHandler.toObject` to an instance of unsafe deserialization. */ - class StrutsUnsafeDeserializationConfig extends Configuration { - StrutsUnsafeDeserializationConfig() { this = "StrutsUnsafeDeserializationConfig" } - override predicate isSource(Node source) { + module StrutsUnsafeDeserializationConfig implements ConfigSig { + predicate isSource(Node source) { source.asParameter() = any(ContentTypeHandlerDeserialization des).getParameter(0) } - override predicate isSink(Node sink) { sink instanceof UnsafeDeserializationSink } + predicate isSink(Node sink) { sink instanceof UnsafeDeserializationSink } } + module StrutsUnsafeDeserializationFlow = Global; Model answer, step 4 ==================== @@ -129,9 +129,8 @@ Model answer, step 4 import PathGraph ... - from PathNode source, PathNode sink, StrutsUnsafeDeserializationConfig conf - where conf.hasFlowPath(source, sink) - and sink.getNode() instanceof UnsafeDeserializationSink - select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input" + from PathNode source, PathNode sink + where StrutsUnsafeDeserializationFlow::flowPath(source, sink) + select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input" More full-featured version: https://github.com/github/securitylab/tree/main/CodeQL_Queries/java/Apache_Struts_CVE-2017-9805 diff --git a/docs/codeql/ql-training/java/global-data-flow-java.rst b/docs/codeql/ql-training/java/global-data-flow-java.rst index 2c1827a937c..d08d45c059a 100644 --- a/docs/codeql/ql-training/java/global-data-flow-java.rst +++ b/docs/codeql/ql-training/java/global-data-flow-java.rst @@ -78,12 +78,12 @@ We want to look for method calls where the method name is ``getNamespace()``, an import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSource(DataFlow::Node source) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Method m | - m.getName() = "getNamespace" and - m.getDeclaringType().getName() = "ActionProxy" and - source.asExpr() = m.getAReference() + m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + source.asExpr() = m.getAReference() ) } ... @@ -105,8 +105,8 @@ Fill in the definition of ``isSink``. import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { /* Fill me in */ } ... @@ -125,9 +125,9 @@ Find a method access to ``compileAndExecute``, and mark the first argument. import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { - exists(MethodAccess ma | + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { + exists(MethodAccess ma | ma.getMethod().getName() = "compileAndExecute" and ma.getArgument(0) = sink.asExpr() ) @@ -148,8 +148,8 @@ A sanitizer allows us to *prevent* flow through a particular node in the graph. .. code-block:: ql - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSanitizer(DataFlow::Node nd) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isBarrier(DataFlow::Node nd) { nd.getEnclosingCallable() .getDeclaringType() .getName() = "ValueStackShadowMap" @@ -164,9 +164,8 @@ Add an additional taint step that (heuristically) taints a local variable if it .. code-block:: ql - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isAdditionalTaintStep(DataFlow::Node node1, - DataFlow::Node node2) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { exists(Field f, RefType t | node1.asExpr() = f.getAnAssignedValue() and node2.asExpr() = f.getAnAccess() and diff --git a/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql b/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql index 9020da5ea2c..3d203c0e147 100644 --- a/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql +++ b/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql @@ -1,14 +1,14 @@ import cpp import semmle.code.cpp.dataflow.TaintTracking -class TaintedFormatConfig extends TaintTracking::Configuration { - TaintedFormatConfig() { this = "TaintedFormatConfig" } +module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { /* TBD */ } - override predicate isSource(DataFlow::Node source) { /* TBD */ } - - override predicate isSink(DataFlow::Node sink) { /* TBD */ } + predicate isSink(DataFlow::Node sink) { /* TBD */ } } -from TaintedFormatConfig cfg, DataFlow::Node source, DataFlow::Node sink -where cfg.hasFlow(source, sink) +module TaintedFormatFlow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink +where TaintedFormatFlow::flow(source, sink) select sink, "This format string may be derived from a $@.", source, "user-controlled value" diff --git a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql index 7604b20eb95..b33a136c670 100644 --- a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql +++ b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql @@ -1,14 +1,14 @@ import java import semmle.code.java.dataflow.TaintTracking -class TaintedOGNLConfig extends TaintTracking::Configuration { - TaintedOGNLConfig() { this = "TaintedOGNLConfig" } +module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { /* TBD */ } - override predicate isSource(DataFlow::Node source) { /* TBD */ } - - override predicate isSink(DataFlow::Node sink) { /* TBD */ } + predicate isSink(DataFlow::Node sink) { /* TBD */ } } -from TaintedOGNLConfig cfg, DataFlow::Node source, DataFlow::Node sink -where cfg.hasFlow(source, sink) +module TaintedOGNLFlow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink +where TaintedOGNLFlow::flow(source, sink) select source, "This untrusted input is evaluated as an OGNL expression $@.", sink, "here" diff --git a/docs/codeql/ql-training/slide-snippets/global-data-flow.rst b/docs/codeql/ql-training/slide-snippets/global-data-flow.rst index 6cb52af0f84..f84db924e27 100644 --- a/docs/codeql/ql-training/slide-snippets/global-data-flow.rst +++ b/docs/codeql/ql-training/slide-snippets/global-data-flow.rst @@ -34,18 +34,18 @@ Global taint tracking library The ``semmle.code..dataflow.TaintTracking`` library provides a framework for implementing solvers for global taint tracking problems: - #. Subclass ``TaintTracking::Configuration`` following this template: + #. Implement ``DataFlow::ConfigSig`` and use ``TaintTracking::Global`` following this template: .. code-block:: ql - class Config extends TaintTracking::Configuration { - Config() { this = "" } - override predicate isSource(DataFlow::Node nd) { ... } - override predicate isSink(DataFlow::Node nd) { ... } + module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node nd) { ... } + predicate isSink(DataFlow::Node nd) { ... } } + module Flow = TaintTracking::Global; - #. Use ``Config.hasFlow(source, sink)`` to find inter-procedural paths. + #. Use ``Flow::flow(source, sink)`` to find inter-procedural paths. .. note:: - In addition to the taint tracking configuration described here, there is also an equivalent *data flow* configuration in ``semmle.code..dataflow.DataFlow``, ``DataFlow::Configuration``. Data flow configurations are used to track whether the exact value produced by a source is used by a sink, whereas taint tracking configurations are used to determine whether the source may influence the value used at the sink. Whether you use taint tracking or data flow depends on the analysis problem you are trying to solve. + In addition to the taint tracking flow configuration described here, there is also an equivalent *data flow* in ``semmle.code..dataflow.DataFlow``, ``DataFlow::Global``. Data flow is used to track whether the exact value produced by a source is used by a sink, whereas taint tracking is used to determine whether the source may influence the value used at the sink. Whether you use taint tracking or data flow depends on the analysis problem you are trying to solve. diff --git a/docs/codeql/ql-training/slide-snippets/path-queries.rst b/docs/codeql/ql-training/slide-snippets/path-queries.rst index e93f7fda966..6ed8b79814d 100644 --- a/docs/codeql/ql-training/slide-snippets/path-queries.rst +++ b/docs/codeql/ql-training/slide-snippets/path-queries.rst @@ -13,12 +13,16 @@ Use this template: */ import semmle.code..dataflow.TaintTracking - import DataFlow::PathGraph + ... - from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink - where cfg.hasFlowPath(source, sink) + + module Flow = TaintTracking::Global; + import Flow::PathGraph + + from Flow::PathNode source, Flow::PathNode sink + where Flow::flowPath(source, sink) select sink, source, sink, "" .. note:: - To see the paths between the source and the sinks, we can convert the query to a path problem query. There are a few minor changes that need to be made for this to work–we need an additional import, to specify ``PathNode`` rather than ``Node``, and to add the source/sink to the query output (so that we can automatically determine the paths). \ No newline at end of file + To see the paths between the source and the sinks, we can convert the query to a path problem query. There are a few minor changes that need to be made for this to work–we need an additional import, to specify ``PathNode`` rather than ``Node``, and to add the source/sink to the query output (so that we can automatically determine the paths). diff --git a/docs/codeql/writing-codeql-queries/creating-path-queries.rst b/docs/codeql/writing-codeql-queries/creating-path-queries.rst index fc3b18a9b95..2c5b1ff57d7 100644 --- a/docs/codeql/writing-codeql-queries/creating-path-queries.rst +++ b/docs/codeql/writing-codeql-queries/creating-path-queries.rst @@ -58,18 +58,22 @@ You should use the following template: import // For some languages (Java/C++/Python/Swift) you need to explicitly import the data flow library, such as // import semmle.code.java.dataflow.DataFlow or import codeql.swift.dataflow.DataFlow - import DataFlow::PathGraph ... - from MyConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink - where config.hasFlowPath(source, sink) + module Flow = DataFlow::Global; + import Flow::PathGraph + + from Flow::PathNode source, Flow::PathNode sink + where Flow::flowPath(source, sink) select sink.getNode(), source, sink, "" Where: -- ``DataFlow::Pathgraph`` is the path graph module you need to import from the standard CodeQL libraries. -- ``source`` and ``sink`` are nodes on the `path graph `__, and ``DataFlow::PathNode`` is their type. -- ``MyConfiguration`` is a class containing the predicates which define how data may flow between the ``source`` and the ``sink``. +- ``MyConfiguration`` is a module containing the predicates that define how data may flow between the ``source`` and the ``sink``. +- ``Flow`` is the result of the data flow computation based on ``MyConfiguration``. +- ``Flow::Pathgraph`` is the resulting data flow graph module you need to import in order to include path explanations in the query. +- ``source`` and ``sink`` are nodes in the graph as defined in the configuration, and ``Flow::PathNode`` is their type. +- ``DataFlow::Global<..>`` is an invocation of data flow. ``TaintTracking::Global<..>`` can be used instead to include a default set of additional taint steps. The following sections describe the main requirements for a valid path query. @@ -83,14 +87,14 @@ The other metadata requirements depend on how you intend to run the query. For m Generating path explanations **************************** -In order to generate path explanations, your query needs to compute a `path graph `__. +In order to generate path explanations, your query needs to compute a graph. To do this you need to define a :ref:`query predicate ` called ``edges`` in your query. This predicate defines the edge relations of the graph you are computing, and it is used to compute the paths related to each result that your query generates. You can import a predefined ``edges`` predicate from a path graph module in one of the standard data flow libraries. In addition to the path graph module, the data flow libraries contain the other ``classes``, ``predicates``, and ``modules`` that are commonly used in data flow analysis. .. code-block:: ql - import DataFlow::PathGraph + import MyFlow::PathGraph This statement imports the ``PathGraph`` module from the data flow library (``DataFlow.qll``), in which ``edges`` is defined. @@ -106,7 +110,7 @@ You can also define your own ``edges`` predicate in the body of your query. It s .. code-block:: ql query predicate edges(PathNode a, PathNode b) { - /** Logical conditions which hold if `(a,b)` is an edge in the data flow graph */ + /* Logical conditions which hold if `(a,b)` is an edge in the data flow graph */ } For more examples of how to define an ``edges`` predicate, visit the `standard CodeQL libraries `__ and search for ``edges``. @@ -117,14 +121,23 @@ Declaring sources and sinks You must provide information about the ``source`` and ``sink`` in your path query. These are objects that correspond to the nodes of the paths that you are exploring. The name and the type of the ``source`` and the ``sink`` must be declared in the ``from`` statement of the query, and the types must be compatible with the nodes of the graph computed by the ``edges`` predicate. -If you are querying C/C++, C#, Go, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement. +If you are querying C/C++, C#, Go, Java, JavaScript, Python, or Ruby code (and you have used ``import MyFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the module resulting from the application of the ``Global<..>`` module in the data flow library. You should declare both of these objects in the ``from`` statement. For example: .. code-block:: ql - from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink + module MyFlow = DataFlow::Global; -The configuration class is accessed by importing the data flow library. This class contains the predicates which define how data flow is treated in the query: + from MyFlow::PathNode source, MyFlow::PathNode sink + +The configuration module must be defined to include definitions of sources and sinks. For example: + +.. code-block:: ql + + module MyConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } + predicate isSink(DataFlow::Node source) { ... } + } - ``isSource()`` defines where data may flow from. - ``isSink()`` defines where data may flow to. @@ -141,11 +154,11 @@ This clause can use :ref:`aggregations `, :ref:`predicates ; + + from MyFlow::PathNode source, MyFlow::PathNode sink + where MyFlow::flowPath(source, sink) select sink.getNode(), source, sink, "Sink is reached from $@.", source.getNode(), "here" The same query can be slightly simplified by rewriting it without :ref:`path explanations `: .. code-block:: ql - from MyConfig config, DataFlow::Node source, DataFlow::Node sink - where config.hasPath(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select sink, "Sink is reached from $@.", source.getNode(), "here" If a data-flow query that you have written doesn't produce the results you expect it to, there may be a problem with your query. @@ -48,7 +48,7 @@ Data-flow configurations contain a parameter called ``fieldFlowBranchLimit``. If .. code-block:: ql - override int fieldFlowBranchLimit() { result = 5000 } + int fieldFlowBranchLimit() { result = 5000 } If there are still no results and performance is still useable, then it is best to leave this set to a high value while doing further debugging. @@ -57,7 +57,7 @@ Partial flow A naive next step could be to change the sink definition to ``any()``. This would mean that we would get a lot of flow to all the places that are reachable from the sources. While this approach may work in some cases, you might find that it produces so many results that it's very hard to explore the findings. It can also dramatically affect query performance. More importantly, you might not even see all the partial flow paths. This is because the data-flow library tries very hard to prune impossible paths and, since field stores and reads must be evenly matched along a path, we will never see paths going through a store that fail to reach a corresponding read. This can make it hard to see where flow actually stops. -To avoid these problems, a data-flow ``Configuration`` comes with a mechanism for exploring partial flow that tries to deal with these caveats. This is the ``Configuration.hasPartialFlow`` predicate: +To avoid these problems, the data-flow library comes with a mechanism for exploring partial flow that tries to deal with these caveats. This is the ``MyFlow::FlowExploration::partialFlow`` predicate: .. code-block:: ql @@ -71,25 +71,23 @@ To avoid these problems, a data-flow ``Configuration`` comes with a mechanism fo * perform poorly if the number of sources is too big and/or the exploration * limit is set too high without using barriers. * - * This predicate is disabled (has no results) by default. Override - * `explorationLimit()` with a suitable number to enable this predicate. - * * To use this in a `path-problem` query, import the module `PartialPathGraph`. */ - final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + predicate partialFlow(PartialPathNode source, PartialPathNode node, int dist) { -There is also a ``Configuration.hasPartialFlowRev`` for exploring flow backwards from a sink. +There is also a ``partialFlowRev`` for exploring flow backwards from a sink. -As noted in the documentation for ``hasPartialFlow`` (for example, in the -`CodeQL for Java documentation `__) you must first enable this by adding an override of ``explorationLimit``. For example: +To get access to these predicates you must instantiate the ``MyFlow::FlowExploration<>`` module with an exploration limit. For example: .. code-block:: ql - override int explorationLimit() { result = 5 } + int explorationLimit() { result = 5 } -This defines the exploration radius within which ``hasPartialFlow`` returns results. + module MyPartialFlow = MyFlow::FlowExploration; -To get good performance when using ``hasPartialFlow`` it is important to ensure the ``isSink`` predicate of the configuration has no results. Likewise, when using ``hasPartialFlowRev`` the ``isSource`` predicate of the configuration should have no results. +This defines the exploration radius within which ``partialFlow`` returns results. + +To get good performance when using ``partialFlow`` it is important to ensure the ``isSink`` predicate of the configuration has no results. Likewise, when using ``partialFlowRev`` the ``isSource`` predicate of the configuration should have no results. It is also useful to focus on a single source at a time as the starting point for the flow exploration. This is most easily done by adding a temporary restriction in the ``isSource`` predicate. @@ -97,9 +95,9 @@ To do quick evaluations of partial flow it is often easiest to add a predicate t .. code-block:: ql - predicate adhocPartialFlow(Callable c, PartialPathNode n, Node src, int dist) { - exists(MyConfig conf, PartialPathNode source | - conf.hasPartialFlow(source, n, dist) and + predicate adhocPartialFlow(Callable c, MyPartialFlow::PartialPathNode n, Node src, int dist) { + exists(MyPartialFlow::PartialPathNode source | + MyPartialFlow::partialFlow(source, n, dist) and src = source.getNode() and c = n.getNode().getEnclosingCallable() ) @@ -111,7 +109,7 @@ If you are focusing on a single source then the ``src`` column is superfluous. Y If you see a large number of partial flow results, you can focus them in a couple of ways: - If flow travels a long distance following an expected path, that can result in a lot of uninteresting flow being included in the exploration radius. To reduce the amount of uninteresting flow, you can replace the source definition with a suitable ``node`` that appears along the path and restart the partial flow exploration from that point. -- Creative use of barriers and sanitizers can be used to cut off flow paths that are uninteresting. This also reduces the number of partial flow results to explore while debugging. +- Creative use of barriers can be used to cut off flow paths that are uninteresting. This also reduces the number of partial flow results to explore while debugging. Further reading ---------------- diff --git a/docs/ql-design-patterns.md b/docs/ql-design-patterns.md index b6d7eab88fc..8d8d126cba2 100644 --- a/docs/ql-design-patterns.md +++ b/docs/ql-design-patterns.md @@ -72,14 +72,12 @@ Importing new files can modify the behaviour of the standard library, by introdu Therefore, unless you have good reason not to, you should ensure that all subclasses are included when the base-class is (to the extent possible). -One example where this _does not_ apply: `DataFlow::Configuration` and its variants are meant to be subclassed, but we generally do not want to import all configurations into the same scope at once. - ## Abstract classes as open or closed unions A class declared as `abstract` in QL represents a union of its direct subtypes (restricted by the intersections of its supertypes and subject to its characteristic predicate). Depending on context, we may want this union to be considered "open" or "closed". -An open union is generally used for extensibility. For example, the abstract classes suggested by the `::Range` design pattern are explicitly intended as extension hooks. As another example, the `DataFlow::Configuration` design pattern provides an abstract class that is intended to be subclassed as a configuration mechanism. +An open union is generally used for extensibility. For example, the abstract classes suggested by the `::Range` design pattern are explicitly intended as extension hooks. A closed union is a class for which we do not expect users of the library to add more values. Historically, we have occasionally modelled this as `abstract` classes in QL, but these days that would be considered an anti-pattern: Abstract classes that are intended to be closed behave in surprising ways when subclassed by library users, and importing libraries that include derived classes can invalidate compilation caches and subvert the meaning of the program. From aaea1ad2fac30fca98c785637f90f7434c0b03bf Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Fri, 14 Jul 2023 10:18:42 +0200 Subject: [PATCH 03/33] Docs: Switch to PascalCase. --- .../query-examples/java/global-data-flow-java-1.ql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql index b33a136c670..431787cac45 100644 --- a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql +++ b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql @@ -1,14 +1,14 @@ import java import semmle.code.java.dataflow.TaintTracking -module TaintedOGNLConfig implements DataFlow::ConfigSig { +module TaintedOgnlConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { /* TBD */ } predicate isSink(DataFlow::Node sink) { /* TBD */ } } -module TaintedOGNLFlow = TaintTracking::Global; +module TaintedOgnlFlow = TaintTracking::Global; from DataFlow::Node source, DataFlow::Node sink -where TaintedOGNLFlow::flow(source, sink) +where TaintedOgnlFlow::flow(source, sink) select source, "This untrusted input is evaluated as an OGNL expression $@.", sink, "here" From ddb499071cfd1ca161e4d869c08c7dc2e670be94 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 17 Jul 2023 16:10:37 +0100 Subject: [PATCH 04/33] Swift: Pragmatic fix for CustomUrlSchemes.qll. --- .../swift/frameworks/StandardLibrary/CustomUrlSchemes.qll | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll index e217de4478d..f6294f7a543 100644 --- a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll +++ b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll @@ -56,7 +56,9 @@ private class ApplicationWithLaunchOptionsFunc extends Function { private class LaunchOptionsUrlVarDecl extends VarDecl { LaunchOptionsUrlVarDecl() { - this.getEnclosingDecl().asNominalTypeDecl().getFullName() = "UIApplication.LaunchOptionsKey" and + // ideally this would be the more accurate, but currently less robust: + // this.getEnclosingDecl().asNominalTypeDecl().getFullName() = "UIApplication.LaunchOptionsKey" and + this.getType().getName() = "UIApplication.LaunchOptionsKey" and this.getName() = "url" } } From efea11fd0f8b1e2c247cf54d673e1d7cc08362dc Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 18 Jul 2023 12:53:45 +0100 Subject: [PATCH 05/33] Swift: getFullName. --- .../swift/frameworks/StandardLibrary/CustomUrlSchemes.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll index f6294f7a543..109d1acec9c 100644 --- a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll +++ b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll @@ -58,7 +58,7 @@ private class LaunchOptionsUrlVarDecl extends VarDecl { LaunchOptionsUrlVarDecl() { // ideally this would be the more accurate, but currently less robust: // this.getEnclosingDecl().asNominalTypeDecl().getFullName() = "UIApplication.LaunchOptionsKey" and - this.getType().getName() = "UIApplication.LaunchOptionsKey" and + this.getType().(NominalType).getFullName() = "UIApplication.LaunchOptionsKey" and this.getName() = "url" } } From afc46576f0dbbf3141dc1756a608f8a25db07c8e Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 19 Jul 2023 09:14:33 +0200 Subject: [PATCH 06/33] Docs: Review fix. --- .../codeql-language-guides/analyzing-data-flow-in-cpp-new.rst | 2 +- .../codeql-language-guides/analyzing-data-flow-in-cpp.rst | 2 +- .../codeql-language-guides/analyzing-data-flow-in-csharp.rst | 2 +- .../codeql-language-guides/analyzing-data-flow-in-java.rst | 2 +- .../codeql-language-guides/analyzing-data-flow-in-python.rst | 2 +- .../codeql-language-guides/analyzing-data-flow-in-ruby.rst | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst index dacfa7c283e..92af2679dc2 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst @@ -222,7 +222,7 @@ Global taint tracking is to global data flow as local taint tracking is to local module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst index 4fc31079f0b..9cd521d1905 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst @@ -207,7 +207,7 @@ Global taint tracking is to global data flow as local taint tracking is to local module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst index f5b2fe29cb3..5bfc939aa76 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst @@ -200,7 +200,7 @@ Global taint tracking is to global data flow what local taint tracking is to loc module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst index 128e8549673..dc08ae11e79 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst @@ -214,7 +214,7 @@ Global taint tracking is to global data flow as local taint tracking is to local module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst index 4a0d12e1f46..61422153735 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst @@ -258,7 +258,7 @@ Global taint tracking is to global data flow what local taint tracking is to loc module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst index 4b132261de3..6a603229376 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst @@ -281,7 +281,7 @@ The global taint tracking library is used by applying the module ``TaintTracking module MyFlow = TaintTracking::Global; -The resulting module is completely similar to the one obtained from ``DataFlow::Global``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 8273fa1a8c720055797985ab3c1789f31708fb1f Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 14 Jul 2023 09:25:38 +0100 Subject: [PATCH 07/33] Swift: Track parse modes (prototype version). --- swift/ql/lib/codeql/swift/regex/Regex.qll | 59 +++++++++++++++++++ .../swift/regex/internal/ParseRegex.qll | 14 ++++- .../swift/regex/internal/RegexTracking.qll | 34 +++++++++-- swift/ql/test/library-tests/regex/regex.swift | 6 +- 4 files changed, 105 insertions(+), 8 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index e958801a9a4..1f155115e84 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -60,6 +60,65 @@ private class StandardRegexCreation extends RegexCreation { override DataFlow::Node getStringInput() { result = input } } +newtype TRegexParseMode = + MkIgnoreCase() or // case insensitive + MkVerbose() or // ignores whitespace and `#` comments within patterns + MkDotAll() or // dot matches all characters, including line terminators + MkMultiLine() or // `^` and `$` also match beginning and end of lines + MkUnicode() // Unicode UAX 29 word boundary mode + +class RegexParseMode extends TRegexParseMode { + string toString() { + (this = MkIgnoreCase() and result = "IGNORECASE") or + (this = MkVerbose() and result = "VERBOSE") or + (this = MkDotAll() and result = "DOTALL") or + (this = MkUnicode() and result = "MULTILINE") or + (this = MkIgnoreCase() and result = "UNICODE") + } +} + +/** + * A unit class for adding additional flow steps for regular expressions. + */ +class RegexAdditionalFlowStep extends Unit { + /** + * Holds if the step from `node1` to `node2` should be considered a flow + * step for regular expressions. + */ + abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); + + /** + * Holds if the step from `node1` to `node2` either sets (`isSet` = true) + * or unsets (`isSet` = false) parse mode `mode` for the regular expression. + */ + abstract predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet); +} + +/** + * An additional flow step for `Regex` or `NSRegularExpression`. + */ +class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + this.modifiesParseMode(nodeFrom, nodeTo, _, _) + } + + override predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet) + { + exists(CallExpr ce | + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and + nodeFrom.asExpr() = ce.getQualifier() and + nodeTo.asExpr() = ce and + mode = MkDotAll() and + // TODO: other methods + // decode the value being set + if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false then + isSet = false // mode is set to false + else + isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value + ) + } +} + /** * A call that evaluates a regular expression. For example, the call to `firstMatch` in: * ``` diff --git a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll index 913866aecdf..168826170b7 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll @@ -6,6 +6,8 @@ */ import swift +private import RegexTracking +private import codeql.swift.regex.Regex /** * A `Expr` containing a regular expression term, that is, either @@ -324,7 +326,17 @@ abstract class RegExp extends Expr { * MULTILINE * UNICODE */ - string getAMode() { result = this.getModeFromPrefix() } + string getAMode() { + // mode flags from inside the regex string + result = this.getModeFromPrefix() + or + // mode flags applied to the regex object before evaluation + exists(RegexEval e | + e.getARegex() = this and + RegexParseModeFlow::flow(_, DataFlow::exprNode(e.getRegexInput())) and + result = "DOTALL" // TODO + ) + } /** * Holds if the `i`th character could not be parsed. diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index c0906b12871..b84316b11ce 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -6,7 +6,7 @@ import swift import codeql.swift.regex.RegexTreeView -private import codeql.swift.dataflow.DataFlow +import codeql.swift.dataflow.DataFlow private import ParseRegex private import codeql.swift.regex.Regex @@ -37,7 +37,6 @@ private module RegexUseConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { // creation of the regex node instanceof RegexCreation - // TODO: track parse mode flags. } predicate isSink(DataFlow::Node node) { @@ -46,9 +45,36 @@ private module RegexUseConfig implements DataFlow::ConfigSig { } predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // TODO: flow through regex methods that return a modified regex. - none() + any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) } } module RegexUseFlow = DataFlow::Global; + +/** + * A data flow configuration for tracking regular expression parse mode + * flags from the point they are set to the point of use. The flow state + * encodes which parse mode flag was set. + */ +private module RegexParseModeConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { + // parse mode flag is set + any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), true) + } + + predicate isBarrierIn(DataFlow::Node node) { + // parse mode flag is set or unset + any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), _) + } + + predicate isSink(DataFlow::Node node) { + // evaluation of the regex + node.asExpr() = any(RegexEval eval).getRegexInput() + } + + predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) + } +} + +module RegexParseModeFlow = DataFlow::Global; diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 8b9c5a18a07..040131de60a 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -210,11 +210,11 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { _ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc _ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc - _ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL + _ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL _ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc - _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL - _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc MISSING: modes="DOTALL | IGNORECASE" + _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL + _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc SPURIOUS: modes=DOTALL MISSING: modes="DOTALL | IGNORECASE" _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL From 3c1f75558077b02e4b600172c361da3b6129ed6e Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 14 Jul 2023 17:13:10 +0100 Subject: [PATCH 08/33] Swift: Support other parse modes. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 34 ++++++++++++++++--- .../swift/regex/internal/ParseRegex.qll | 3 +- .../swift/regex/internal/RegexTracking.qll | 34 +++++++++++++------ swift/ql/test/library-tests/regex/regex.swift | 2 +- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index 1f155115e84..9ba22d1dafc 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -72,8 +72,8 @@ class RegexParseMode extends TRegexParseMode { (this = MkIgnoreCase() and result = "IGNORECASE") or (this = MkVerbose() and result = "VERBOSE") or (this = MkDotAll() and result = "DOTALL") or - (this = MkUnicode() and result = "MULTILINE") or - (this = MkIgnoreCase() and result = "UNICODE") + (this = MkMultiLine() and result = "MULTILINE") or + (this = MkUnicode() and result = "UNICODE") } } @@ -105,11 +105,21 @@ class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { override predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet) { exists(CallExpr ce | - ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and nodeFrom.asExpr() = ce.getQualifier() and nodeTo.asExpr() = ce and - mode = MkDotAll() and - // TODO: other methods + // decode the parse mode being set + ( + ( + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "ignoresCase(_:)") and + mode = MkIgnoreCase() + ) or ( + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and + mode = MkDotAll() + ) or ( + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "anchorsMatchLineEndings(_:)") and + mode = MkMultiLine() + ) + ) and // decode the value being set if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false then isSet = false // mode is set to false @@ -150,6 +160,20 @@ abstract class RegexEval extends CallExpr { RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput())) ) } + + /** + * Gets a parse mode that is set at this evaluation (in at least one path + * from the creation of the regular expression object). + */ + RegexParseMode getAParseMode() { + exists(DataFlow::Node setNode | + // parse mode flag is set + any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) + and + // reaches here + RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput())) + ) + } } /** diff --git a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll index 168826170b7..fcc486781e8 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll @@ -333,8 +333,7 @@ abstract class RegExp extends Expr { // mode flags applied to the regex object before evaluation exists(RegexEval e | e.getARegex() = this and - RegexParseModeFlow::flow(_, DataFlow::exprNode(e.getRegexInput())) and - result = "DOTALL" // TODO + result = e.getAParseMode().toString() // TODO: temp toString() ) } diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index b84316b11ce..cb60032bc5e 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -56,25 +56,39 @@ module RegexUseFlow = DataFlow::Global; * flags from the point they are set to the point of use. The flow state * encodes which parse mode flag was set. */ -private module RegexParseModeConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node node) { +private module RegexParseModeConfig implements DataFlow::StateConfigSig { + class FlowState = RegexParseMode; + + predicate isSource(DataFlow::Node node, FlowState flowstate) { // parse mode flag is set - any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), true) + any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, flowstate, true) } - predicate isBarrierIn(DataFlow::Node node) { - // parse mode flag is set or unset - any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), _) - } - - predicate isSink(DataFlow::Node node) { + predicate isSink(DataFlow::Node node, FlowState flowstate) { // evaluation of the regex node.asExpr() = any(RegexEval eval).getRegexInput() + and + flowstate = any(FlowState fs) + } + + predicate isBarrier(DataFlow::Node node) { + none() + } + + predicate isBarrier(DataFlow::Node node, FlowState flowstate) { + // parse mode flag is set or unset + any(RegexAdditionalFlowStep s).modifiesParseMode(node, _, flowstate, _) } predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) } + + predicate isAdditionalFlowStep( + DataFlow::Node nodeFrom, FlowState flowstateFrom, DataFlow::Node nodeTo, FlowState flowStateTo + ) { + none() + } } -module RegexParseModeFlow = DataFlow::Global; +module RegexParseModeFlow = DataFlow::GlobalWithState; diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 040131de60a..3202adf8566 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -214,7 +214,7 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { _ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL - _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc SPURIOUS: modes=DOTALL MISSING: modes="DOTALL | IGNORECASE" + _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc modes="DOTALL | IGNORECASE" _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL From f8b8c67813891cb5b80c71ae131e409619002360 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 14 Jul 2023 18:08:06 +0100 Subject: [PATCH 09/33] Swift: Clean up and autoformat. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 57 ++++++++++--------- .../swift/regex/internal/ParseRegex.qll | 5 +- .../swift/regex/internal/RegexTracking.qll | 7 +-- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index 9ba22d1dafc..05b2546973c 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -68,13 +68,19 @@ newtype TRegexParseMode = MkUnicode() // Unicode UAX 29 word boundary mode class RegexParseMode extends TRegexParseMode { - string toString() { - (this = MkIgnoreCase() and result = "IGNORECASE") or - (this = MkVerbose() and result = "VERBOSE") or - (this = MkDotAll() and result = "DOTALL") or - (this = MkMultiLine() and result = "MULTILINE") or - (this = MkUnicode() and result = "UNICODE") + string getName() { + this = MkIgnoreCase() and result = "IGNORECASE" + or + this = MkVerbose() and result = "VERBOSE" + or + this = MkDotAll() and result = "DOTALL" + or + this = MkMultiLine() and result = "MULTILINE" + or + this = MkUnicode() and result = "UNICODE" } + + string toString() { result = this.getName() } } /** @@ -91,7 +97,9 @@ class RegexAdditionalFlowStep extends Unit { * Holds if the step from `node1` to `node2` either sets (`isSet` = true) * or unsets (`isSet` = false) parse mode `mode` for the regular expression. */ - abstract predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet); + abstract predicate modifiesParseMode( + DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet + ); } /** @@ -102,29 +110,27 @@ class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { this.modifiesParseMode(nodeFrom, nodeTo, _, _) } - override predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet) - { + override predicate modifiesParseMode( + DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet + ) { exists(CallExpr ce | nodeFrom.asExpr() = ce.getQualifier() and nodeTo.asExpr() = ce and // decode the parse mode being set ( - ( - ce.getStaticTarget().(Method).hasQualifiedName("Regex", "ignoresCase(_:)") and - mode = MkIgnoreCase() - ) or ( - ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and - mode = MkDotAll() - ) or ( - ce.getStaticTarget().(Method).hasQualifiedName("Regex", "anchorsMatchLineEndings(_:)") and - mode = MkMultiLine() - ) + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "ignoresCase(_:)") and + mode = MkIgnoreCase() + or + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and + mode = MkDotAll() + or + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "anchorsMatchLineEndings(_:)") and + mode = MkMultiLine() ) and // decode the value being set - if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false then - isSet = false // mode is set to false - else - isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value + if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false + then isSet = false // mode is set to false + else isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value ) } } @@ -168,9 +174,8 @@ abstract class RegexEval extends CallExpr { RegexParseMode getAParseMode() { exists(DataFlow::Node setNode | // parse mode flag is set - any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) - and - // reaches here + any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) and + // reaches this eval RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput())) ) } diff --git a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll index fcc486781e8..f49f61cb365 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll @@ -319,7 +319,8 @@ abstract class RegExp extends Expr { } /** - * Gets a mode (if any) of this regular expression. Can be any of: + * Gets a mode (if any) of this regular expression in any evaluation. Can be + * any of: * IGNORECASE * VERBOSE * DOTALL @@ -333,7 +334,7 @@ abstract class RegExp extends Expr { // mode flags applied to the regex object before evaluation exists(RegexEval e | e.getARegex() = this and - result = e.getAParseMode().toString() // TODO: temp toString() + result = e.getAParseMode().getName() ) } diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index cb60032bc5e..6a88861993e 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -66,14 +66,11 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig { predicate isSink(DataFlow::Node node, FlowState flowstate) { // evaluation of the regex - node.asExpr() = any(RegexEval eval).getRegexInput() - and + node.asExpr() = any(RegexEval eval).getRegexInput() and flowstate = any(FlowState fs) } - predicate isBarrier(DataFlow::Node node) { - none() - } + predicate isBarrier(DataFlow::Node node) { none() } predicate isBarrier(DataFlow::Node node, FlowState flowstate) { // parse mode flag is set or unset From 84f592b8a181e641d330f1499f0071e264338687 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 14 Jul 2023 18:15:34 +0100 Subject: [PATCH 10/33] Swift: Add another test case. --- .../test/library-tests/regex/parse.expected | 278 +++++++++--------- swift/ql/test/library-tests/regex/regex.swift | 2 + 2 files changed, 142 insertions(+), 138 deletions(-) diff --git a/swift/ql/test/library-tests/regex/parse.expected b/swift/ql/test/library-tests/regex/parse.expected index 008808acf73..aaf02f3de3a 100644 --- a/swift/ql/test/library-tests/regex/parse.expected +++ b/swift/ql/test/library-tests/regex/parse.expected @@ -6327,84 +6327,72 @@ redos_variants.swift: # 579| [RegExpConstant, RegExpNormalChar] c regex.swift: -# 110| [RegExpDot] . +# 111| [RegExpDot] . -# 110| [RegExpStar] .* +# 111| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 132| [RegExpDot] . +# 133| [RegExpDot] . -# 132| [RegExpStar] .* +# 133| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 149| [RegExpDot] . +# 150| [RegExpDot] . -# 149| [RegExpStar] .* +# 150| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 149| [RegExpDot] . +# 150| [RegExpDot] . -# 149| [RegExpPlus] .+ +# 150| [RegExpPlus] .+ #-----| 0 -> [RegExpDot] . -# 156| [RegExpGroup] ([\w.]+) +# 157| [RegExpGroup] ([\w.]+) #-----| 0 -> [RegExpPlus] [\w.]+ -# 156| [RegExpStar] ([\w.]+)* +# 157| [RegExpStar] ([\w.]+)* #-----| 0 -> [RegExpGroup] ([\w.]+) -# 156| [RegExpCharacterClass] [\w.] +# 157| [RegExpCharacterClass] [\w.] #-----| 0 -> [RegExpCharacterClassEscape] \w #-----| 1 -> [RegExpConstant, RegExpNormalChar] . -# 156| [RegExpPlus] [\w.]+ +# 157| [RegExpPlus] [\w.]+ #-----| 0 -> [RegExpCharacterClass] [\w.] -# 156| [RegExpCharacterClassEscape] \w +# 157| [RegExpCharacterClassEscape] \w -# 156| [RegExpConstant, RegExpNormalChar] . +# 157| [RegExpConstant, RegExpNormalChar] . -# 163| [RegExpConstant, RegExpNormalChar] -# 163| - -# 164| [RegExpConstant, RegExpEscape] \n +# 164| [RegExpConstant, RegExpNormalChar] +# 164| # 165| [RegExpConstant, RegExpEscape] \n -# 175| [RegExpConstant, RegExpNormalChar] aa +# 166| [RegExpConstant, RegExpEscape] \n -# 175| [RegExpAlt] aa|bb +# 176| [RegExpConstant, RegExpNormalChar] aa + +# 176| [RegExpAlt] aa|bb #-----| 0 -> [RegExpConstant, RegExpNormalChar] aa #-----| 1 -> [RegExpConstant, RegExpNormalChar] bb -# 175| [RegExpConstant, RegExpNormalChar] bb +# 176| [RegExpConstant, RegExpNormalChar] bb -# 179| [RegExpConstant, RegExpNormalChar] aa +# 180| [RegExpConstant, RegExpNormalChar] aa -# 179| [RegExpAlt] aa| -# 179| bb +# 180| [RegExpAlt] aa| +# 180| bb #-----| 0 -> [RegExpConstant, RegExpNormalChar] aa #-----| 1 -> [RegExpConstant, RegExpNormalChar] #-----| bb -# 179| [RegExpConstant, RegExpNormalChar] -# 179| bb +# 180| [RegExpConstant, RegExpNormalChar] +# 180| bb -# 187| [RegExpCharacterClass] [a-z] +# 188| [RegExpCharacterClass] [a-z] #-----| 0 -> [RegExpCharacterRange] a-z -# 187| [RegExpConstant, RegExpNormalChar] a - -# 187| [RegExpCharacterRange] a-z -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] z - -# 187| [RegExpConstant, RegExpNormalChar] z - -# 188| [RegExpCharacterClass] [a-zA-Z] -#-----| 0 -> [RegExpCharacterRange] a-z -#-----| 1 -> [RegExpCharacterRange] A-Z - # 188| [RegExpConstant, RegExpNormalChar] a # 188| [RegExpCharacterRange] a-z @@ -6413,119 +6401,129 @@ regex.swift: # 188| [RegExpConstant, RegExpNormalChar] z -# 188| [RegExpConstant, RegExpNormalChar] A +# 189| [RegExpCharacterClass] [a-zA-Z] +#-----| 0 -> [RegExpCharacterRange] a-z +#-----| 1 -> [RegExpCharacterRange] A-Z -# 188| [RegExpCharacterRange] A-Z +# 189| [RegExpConstant, RegExpNormalChar] a + +# 189| [RegExpCharacterRange] a-z +#-----| 0 -> [RegExpConstant, RegExpNormalChar] a +#-----| 1 -> [RegExpConstant, RegExpNormalChar] z + +# 189| [RegExpConstant, RegExpNormalChar] z + +# 189| [RegExpConstant, RegExpNormalChar] A + +# 189| [RegExpCharacterRange] A-Z #-----| 0 -> [RegExpConstant, RegExpNormalChar] A #-----| 1 -> [RegExpConstant, RegExpNormalChar] Z -# 188| [RegExpConstant, RegExpNormalChar] Z +# 189| [RegExpConstant, RegExpNormalChar] Z -# 191| [RegExpCharacterClass] [a-] +# 192| [RegExpCharacterClass] [a-] #-----| 0 -> [RegExpConstant, RegExpNormalChar] a #-----| 1 -> [RegExpConstant, RegExpNormalChar] - -# 191| [RegExpConstant, RegExpNormalChar] a - -# 191| [RegExpConstant, RegExpNormalChar] - - -# 192| [RegExpCharacterClass] [-a] -#-----| 0 -> [RegExpConstant, RegExpNormalChar] - -#-----| 1 -> [RegExpConstant, RegExpNormalChar] a +# 192| [RegExpConstant, RegExpNormalChar] a # 192| [RegExpConstant, RegExpNormalChar] - -# 192| [RegExpConstant, RegExpNormalChar] a - -# 193| [RegExpCharacterClass] [-] +# 193| [RegExpCharacterClass] [-a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] - +#-----| 1 -> [RegExpConstant, RegExpNormalChar] a # 193| [RegExpConstant, RegExpNormalChar] - -# 194| [RegExpCharacterClass] [*] +# 193| [RegExpConstant, RegExpNormalChar] a + +# 194| [RegExpCharacterClass] [-] +#-----| 0 -> [RegExpConstant, RegExpNormalChar] - + +# 194| [RegExpConstant, RegExpNormalChar] - + +# 195| [RegExpCharacterClass] [*] #-----| 0 -> [RegExpConstant, RegExpNormalChar] * -# 194| [RegExpConstant, RegExpNormalChar] * +# 195| [RegExpConstant, RegExpNormalChar] * -# 195| [RegExpCharacterClass] [^a] +# 196| [RegExpCharacterClass] [^a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 195| [RegExpConstant, RegExpNormalChar] a - -# 196| [RegExpCharacterClass] [a^] -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] ^ - # 196| [RegExpConstant, RegExpNormalChar] a -# 196| [RegExpConstant, RegExpNormalChar] ^ +# 197| [RegExpCharacterClass] [a^] +#-----| 0 -> [RegExpConstant, RegExpNormalChar] a +#-----| 1 -> [RegExpConstant, RegExpNormalChar] ^ -# 197| [RegExpCharacterClass] [\\] +# 197| [RegExpConstant, RegExpNormalChar] a + +# 197| [RegExpConstant, RegExpNormalChar] ^ + +# 198| [RegExpCharacterClass] [\\] #-----| 0 -> [RegExpConstant, RegExpEscape] \\ -# 197| [RegExpConstant, RegExpEscape] \\ - -# 198| [RegExpCharacterClass] [\\\]] -#-----| 0 -> [RegExpConstant, RegExpEscape] \\ -#-----| 1 -> [RegExpConstant, RegExpEscape] \] - # 198| [RegExpConstant, RegExpEscape] \\ -# 198| [RegExpConstant, RegExpEscape] \] +# 199| [RegExpCharacterClass] [\\\]] +#-----| 0 -> [RegExpConstant, RegExpEscape] \\ +#-----| 1 -> [RegExpConstant, RegExpEscape] \] -# 199| [RegExpCharacterClass] [:] +# 199| [RegExpConstant, RegExpEscape] \\ + +# 199| [RegExpConstant, RegExpEscape] \] + +# 200| [RegExpCharacterClass] [:] #-----| 0 -> [RegExpConstant, RegExpNormalChar] : -# 199| [RegExpConstant, RegExpNormalChar] : +# 200| [RegExpConstant, RegExpNormalChar] : -# 200| [RegExpNamedCharacterProperty] [:digit:] +# 201| [RegExpNamedCharacterProperty] [:digit:] -# 201| [RegExpNamedCharacterProperty] [:alnum:] +# 202| [RegExpNamedCharacterProperty] [:alnum:] -# 204| [RegExpCharacterClass] []a] +# 205| [RegExpCharacterClass] []a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] ] #-----| 1 -> [RegExpConstant, RegExpNormalChar] a -# 204| [RegExpConstant, RegExpNormalChar] ] +# 205| [RegExpConstant, RegExpNormalChar] ] -# 204| [RegExpConstant, RegExpNormalChar] a +# 205| [RegExpConstant, RegExpNormalChar] a -# 205| [RegExpNamedCharacterProperty] [:aaaaa:] +# 206| [RegExpNamedCharacterProperty] [:aaaaa:] -# 209| [RegExpGroup] (?i) +# 210| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 209| [RegExpSequence] (?i)abc +# 210| [RegExpSequence] (?i)abc #-----| 0 -> [RegExpGroup] (?i) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 209| [RegExpConstant, RegExpNormalChar] i - -# 209| [RegExpConstant, RegExpNormalChar] abc - -# 210| [RegExpGroup] (?s) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] s - -# 210| [RegExpSequence] (?s)abc -#-----| 0 -> [RegExpGroup] (?s) -#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc - -# 210| [RegExpConstant, RegExpNormalChar] s +# 210| [RegExpConstant, RegExpNormalChar] i # 210| [RegExpConstant, RegExpNormalChar] abc -# 211| [RegExpGroup] (?is) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] is +# 211| [RegExpGroup] (?s) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] s -# 211| [RegExpSequence] (?is)abc -#-----| 0 -> [RegExpGroup] (?is) +# 211| [RegExpSequence] (?s)abc +#-----| 0 -> [RegExpGroup] (?s) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 211| [RegExpConstant, RegExpNormalChar] is +# 211| [RegExpConstant, RegExpNormalChar] s # 211| [RegExpConstant, RegExpNormalChar] abc -# 213| [RegExpConstant, RegExpNormalChar] abc +# 212| [RegExpGroup] (?is) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] is + +# 212| [RegExpSequence] (?is)abc +#-----| 0 -> [RegExpGroup] (?is) +#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc + +# 212| [RegExpConstant, RegExpNormalChar] is + +# 212| [RegExpConstant, RegExpNormalChar] abc # 214| [RegExpConstant, RegExpNormalChar] abc @@ -6535,67 +6533,71 @@ regex.swift: # 217| [RegExpConstant, RegExpNormalChar] abc -# 219| [RegExpDot] . +# 218| [RegExpConstant, RegExpNormalChar] abc -# 219| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 220| [RegExpDot] . - -# 220| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . +# 219| [RegExpConstant, RegExpNormalChar] abc # 221| [RegExpDot] . # 221| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 227| [RegExpGroup] (?i-s) +# 222| [RegExpDot] . + +# 222| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 223| [RegExpDot] . + +# 223| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 229| [RegExpGroup] (?i-s) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i-s -# 227| [RegExpSequence] (?i-s)abc +# 229| [RegExpSequence] (?i-s)abc #-----| 0 -> [RegExpGroup] (?i-s) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 227| [RegExpConstant, RegExpNormalChar] i-s +# 229| [RegExpConstant, RegExpNormalChar] i-s -# 227| [RegExpConstant, RegExpNormalChar] abc +# 229| [RegExpConstant, RegExpNormalChar] abc -# 230| [RegExpConstant, RegExpNormalChar] abc +# 232| [RegExpConstant, RegExpNormalChar] abc -# 230| [RegExpSequence] abc(?i)def +# 232| [RegExpSequence] abc(?i)def #-----| 0 -> [RegExpConstant, RegExpNormalChar] abc #-----| 1 -> [RegExpGroup] (?i) #-----| 2 -> [RegExpConstant, RegExpNormalChar] def -# 230| [RegExpGroup] (?i) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] i - -# 230| [RegExpConstant, RegExpNormalChar] i - -# 230| [RegExpConstant, RegExpNormalChar] def - -# 231| [RegExpConstant, RegExpNormalChar] abc - -# 231| [RegExpSequence] abc(?i:def)ghi -#-----| 0 -> [RegExpConstant, RegExpNormalChar] abc -#-----| 1 -> [RegExpGroup] (?i:def) -#-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi - -# 231| [RegExpGroup] (?i:def) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def - -# 231| [RegExpConstant, RegExpNormalChar] i:def - -# 231| [RegExpConstant, RegExpNormalChar] ghi - # 232| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i # 232| [RegExpConstant, RegExpNormalChar] i -# 232| [RegExpConstant, RegExpNormalChar] abc - -# 232| [RegExpConstant, RegExpNormalChar] -i - # 232| [RegExpConstant, RegExpNormalChar] def + +# 233| [RegExpConstant, RegExpNormalChar] abc + +# 233| [RegExpSequence] abc(?i:def)ghi +#-----| 0 -> [RegExpConstant, RegExpNormalChar] abc +#-----| 1 -> [RegExpGroup] (?i:def) +#-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi + +# 233| [RegExpGroup] (?i:def) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def + +# 233| [RegExpConstant, RegExpNormalChar] i:def + +# 233| [RegExpConstant, RegExpNormalChar] ghi + +# 234| [RegExpGroup] (?i) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] i + +# 234| [RegExpConstant, RegExpNormalChar] i + +# 234| [RegExpConstant, RegExpNormalChar] abc + +# 234| [RegExpConstant, RegExpNormalChar] -i + +# 234| [RegExpConstant, RegExpNormalChar] def diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 3202adf8566..6a0191d4ba4 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -19,6 +19,7 @@ struct Regex : RegexComponent { func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return self } func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return self } + func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex.RegexOutput> { return self } func firstMatch(in string: String) throws -> Regex.Match? { return nil } func prefixMatch(in string: String) throws -> Regex.Match? { return nil } @@ -215,6 +216,7 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc modes="DOTALL | IGNORECASE" + _ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL From dc5f964ce0918257fe598dd93318b9b563245ed3 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 14 Jul 2023 18:22:35 +0100 Subject: [PATCH 11/33] Swift: Modify the test stubs to test flow models more robustly. --- swift/ql/test/library-tests/regex/regex.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 6a0191d4ba4..075833491e4 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -17,9 +17,9 @@ struct Regex : RegexComponent { init(_ pattern: String) throws where Output == AnyRegexOutput { } - func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return self } - func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return self } - func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex.RegexOutput> { return self } + func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } + func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } + func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } func firstMatch(in string: String) throws -> Regex.Match? { return nil } func prefixMatch(in string: String) throws -> Regex.Match? { return nil } From cd1e73bd65d977b6f86367958a37efbcfc430aff Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:48:00 +0100 Subject: [PATCH 12/33] Swift: Add some more test cases. --- .../test/library-tests/regex/parse.expected | 127 +++++++++--------- swift/ql/test/library-tests/regex/regex.swift | 25 ++-- 2 files changed, 83 insertions(+), 69 deletions(-) diff --git a/swift/ql/test/library-tests/regex/parse.expected b/swift/ql/test/library-tests/regex/parse.expected index aaf02f3de3a..c38c5aa6331 100644 --- a/swift/ql/test/library-tests/regex/parse.expected +++ b/swift/ql/test/library-tests/regex/parse.expected @@ -6492,112 +6492,117 @@ regex.swift: # 206| [RegExpNamedCharacterProperty] [:aaaaa:] -# 210| [RegExpGroup] (?i) +# 211| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 210| [RegExpSequence] (?i)abc +# 211| [RegExpSequence] (?i)abc #-----| 0 -> [RegExpGroup] (?i) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 210| [RegExpConstant, RegExpNormalChar] i - -# 210| [RegExpConstant, RegExpNormalChar] abc - -# 211| [RegExpGroup] (?s) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] s - -# 211| [RegExpSequence] (?s)abc -#-----| 0 -> [RegExpGroup] (?s) -#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc - -# 211| [RegExpConstant, RegExpNormalChar] s +# 211| [RegExpConstant, RegExpNormalChar] i # 211| [RegExpConstant, RegExpNormalChar] abc -# 212| [RegExpGroup] (?is) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] is +# 212| [RegExpGroup] (?s) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] s -# 212| [RegExpSequence] (?is)abc -#-----| 0 -> [RegExpGroup] (?is) +# 212| [RegExpSequence] (?s)abc +#-----| 0 -> [RegExpGroup] (?s) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 212| [RegExpConstant, RegExpNormalChar] is +# 212| [RegExpConstant, RegExpNormalChar] s # 212| [RegExpConstant, RegExpNormalChar] abc -# 214| [RegExpConstant, RegExpNormalChar] abc +# 213| [RegExpGroup] (?is) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] is -# 215| [RegExpConstant, RegExpNormalChar] abc +# 213| [RegExpSequence] (?is)abc +#-----| 0 -> [RegExpGroup] (?is) +#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 216| [RegExpConstant, RegExpNormalChar] abc +# 213| [RegExpConstant, RegExpNormalChar] is -# 217| [RegExpConstant, RegExpNormalChar] abc +# 213| [RegExpConstant, RegExpNormalChar] abc -# 218| [RegExpConstant, RegExpNormalChar] abc - -# 219| [RegExpConstant, RegExpNormalChar] abc - -# 221| [RegExpDot] . - -# 221| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 222| [RegExpDot] . - -# 222| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 223| [RegExpDot] . - -# 223| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 229| [RegExpGroup] (?i-s) +# 214| [RegExpGroup] (?i-s) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i-s -# 229| [RegExpSequence] (?i-s)abc +# 214| [RegExpSequence] (?i-s)abc #-----| 0 -> [RegExpGroup] (?i-s) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 229| [RegExpConstant, RegExpNormalChar] i-s +# 214| [RegExpConstant, RegExpNormalChar] i-s -# 229| [RegExpConstant, RegExpNormalChar] abc +# 214| [RegExpConstant, RegExpNormalChar] abc -# 232| [RegExpConstant, RegExpNormalChar] abc +# 217| [RegExpConstant, RegExpNormalChar] abc -# 232| [RegExpSequence] abc(?i)def +# 217| [RegExpSequence] abc(?i)def #-----| 0 -> [RegExpConstant, RegExpNormalChar] abc #-----| 1 -> [RegExpGroup] (?i) #-----| 2 -> [RegExpConstant, RegExpNormalChar] def -# 232| [RegExpGroup] (?i) +# 217| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 232| [RegExpConstant, RegExpNormalChar] i +# 217| [RegExpConstant, RegExpNormalChar] i -# 232| [RegExpConstant, RegExpNormalChar] def +# 217| [RegExpConstant, RegExpNormalChar] def -# 233| [RegExpConstant, RegExpNormalChar] abc +# 218| [RegExpConstant, RegExpNormalChar] abc -# 233| [RegExpSequence] abc(?i:def)ghi +# 218| [RegExpSequence] abc(?i:def)ghi #-----| 0 -> [RegExpConstant, RegExpNormalChar] abc #-----| 1 -> [RegExpGroup] (?i:def) #-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi -# 233| [RegExpGroup] (?i:def) +# 218| [RegExpGroup] (?i:def) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def -# 233| [RegExpConstant, RegExpNormalChar] i:def +# 218| [RegExpConstant, RegExpNormalChar] i:def -# 233| [RegExpConstant, RegExpNormalChar] ghi +# 218| [RegExpConstant, RegExpNormalChar] ghi -# 234| [RegExpGroup] (?i) +# 219| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 234| [RegExpConstant, RegExpNormalChar] i +# 219| [RegExpConstant, RegExpNormalChar] i -# 234| [RegExpConstant, RegExpNormalChar] abc +# 219| [RegExpConstant, RegExpNormalChar] abc -# 234| [RegExpConstant, RegExpNormalChar] -i +# 219| [RegExpConstant, RegExpNormalChar] -i -# 234| [RegExpConstant, RegExpNormalChar] def +# 219| [RegExpConstant, RegExpNormalChar] def + +# 222| [RegExpConstant, RegExpNormalChar] abc + +# 223| [RegExpConstant, RegExpNormalChar] abc + +# 224| [RegExpConstant, RegExpNormalChar] abc + +# 225| [RegExpConstant, RegExpNormalChar] abc + +# 226| [RegExpConstant, RegExpNormalChar] abc + +# 227| [RegExpConstant, RegExpNormalChar] abc + +# 230| [RegExpDot] . + +# 230| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 231| [RegExpDot] . + +# 231| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 232| [RegExpDot] . + +# 232| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 235| [RegExpDot] . + +# 235| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 075833491e4..19c7448500a 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -207,10 +207,18 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { // --- parse modes --- + // parse modes encoded in the string _ = try Regex("(?i)abc").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc _ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc _ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc + _ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc MISSING: modes=IGNORECASE SPURIOUS: modes="DOTALL | IGNORECASE" + // these cases use parse modes on localized areas of the regex, which we don't currently support + _ = try Regex("abc(?i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i)def + _ = try Regex("abc(?i:def)ghi").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i:def)ghi + _ = try Regex("(?i)abc(?-i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc(?-i)def SPURIOUS: hasParseFailure= + + // parse modes set through Regex _ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL _ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc @@ -218,18 +226,19 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc modes="DOTALL | IGNORECASE" _ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE + // parse modes set through NSRegularExpression _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL _ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE" + let myOptions1 : NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators] + _ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE" + + // parse modes set through other methods + + let myOptions2 : NSString.CompareOptions = [.regularExpression, .caseInsensitive] _ = input.replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive]) // $ MISSING: regex=.* input=input modes=IGNORECASE - + _ = input.replacingOccurrences(of: ".*", with: "", options: myOptions2) // $ MISSING: regex=.* input=input modes=IGNORECASE _ = NSString(string: "abc").replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive], range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS modes=IGNORECASE - - _ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc MISSING: modes=IGNORECASE SPURIOUS: modes="DOTALL | IGNORECASE" - - // these cases use parse modes on localized areas of the regex, which we don't currently support - _ = try Regex("abc(?i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i)def - _ = try Regex("abc(?i:def)ghi").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i:def)ghi - _ = try Regex("(?i)abc(?-i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc(?-i)def SPURIOUS: hasParseFailure= + _ = NSString(string: "abc").replacingOccurrences(of: ".*", with: "", options: myOptions2, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS modes=IGNORECASE } From cf7311f3f1ab7cd79092b34c9c998c68fe7f6fa8 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:27:36 +0100 Subject: [PATCH 13/33] Swift: Expand parse mode support to include NSRegularExpression options. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 116 +++++++++++++++--- .../swift/regex/internal/RegexTracking.qll | 23 +++- swift/ql/test/library-tests/regex/regex.swift | 8 +- 3 files changed, 119 insertions(+), 28 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index 05b2546973c..f52919456d1 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -36,22 +36,23 @@ abstract class RegexCreation extends DataFlow::Node { * created from. */ abstract DataFlow::Node getStringInput(); + + /** + * Gets a dataflow node for the options input that might contain parse mode + * flags (if any). + */ + DataFlow::Node getOptionsInput() { none() } } /** - * A data-flow node where a `Regex` or `NSRegularExpression` object is created. + * A data-flow node where a `Regex` object is created. */ -private class StandardRegexCreation extends RegexCreation { +private class RegexRegexCreation extends RegexCreation { DataFlow::Node input; - StandardRegexCreation() { + RegexRegexCreation() { exists(CallExpr call | - ( - call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or - call.getStaticTarget() - .(Method) - .hasQualifiedName("NSRegularExpression", "init(pattern:options:)") - ) and + call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) and input.asExpr() = call.getArgument(0).getExpr() and this.asExpr() = call ) @@ -60,6 +61,29 @@ private class StandardRegexCreation extends RegexCreation { override DataFlow::Node getStringInput() { result = input } } +/** + * A data-flow node where an `NSRegularExpression` object is created. + */ +private class NSRegularExpressionRegexCreation extends RegexCreation { + DataFlow::Node input; + + NSRegularExpressionRegexCreation() { + exists(CallExpr call | + call.getStaticTarget() + .(Method) + .hasQualifiedName("NSRegularExpression", "init(pattern:options:)") and + input.asExpr() = call.getArgument(0).getExpr() and + this.asExpr() = call + ) + } + + override DataFlow::Node getStringInput() { result = input } + + override DataFlow::Node getOptionsInput() { + result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr() + } +} + newtype TRegexParseMode = MkIgnoreCase() or // case insensitive MkVerbose() or // ignores whitespace and `#` comments within patterns @@ -94,25 +118,29 @@ class RegexAdditionalFlowStep extends Unit { abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); /** - * Holds if the step from `node1` to `node2` either sets (`isSet` = true) - * or unsets (`isSet` = false) parse mode `mode` for the regular expression. + * Holds if a regular expression parse mode is either set (`isSet` = true) + * or unset (`isSet` = false) at `node`. Parse modes propagate through + * array construction and regex constuction. */ - abstract predicate modifiesParseMode( - DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet - ); + abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet); } /** - * An additional flow step for `Regex` or `NSRegularExpression`. + * An additional flow step for `Regex`. */ -class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { +class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - this.modifiesParseMode(nodeFrom, nodeTo, _, _) + this.setsParseModeEdge(nodeFrom, nodeTo, _, _) } - override predicate modifiesParseMode( + override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) { + this.setsParseModeEdge(_, node, mode, isSet) + } + + private predicate setsParseModeEdge( DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet ) { + // `Regex` methods that modify parse mode exists(CallExpr ce | nodeFrom.asExpr() = ce.getQualifier() and nodeTo.asExpr() = ce and @@ -135,6 +163,56 @@ class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { } } +/** + * An additional flow step for `NSRegularExpression`. + */ +class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } + + override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) { + // `NSRegularExpression.Options` values + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "caseInsensitive") and + mode = MkIgnoreCase() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "allowCommentsAndWhitespace") and + mode = MkVerbose() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "dotMatchesLineSeparators") and + mode = MkDotAll() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "anchorsMatchLines") and + mode = MkMultiLine() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and + mode = MkUnicode() and + isSet = true + } +} + /** * A call that evaluates a regular expression. For example, the call to `firstMatch` in: * ``` @@ -174,7 +252,7 @@ abstract class RegexEval extends CallExpr { RegexParseMode getAParseMode() { exists(DataFlow::Node setNode | // parse mode flag is set - any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) and + any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and // reaches this eval RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput())) ) diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index 6a88861993e..c238c35af88 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -53,15 +53,15 @@ module RegexUseFlow = DataFlow::Global; /** * A data flow configuration for tracking regular expression parse mode - * flags from the point they are set to the point of use. The flow state - * encodes which parse mode flag was set. + * flags from wherever they are created or set through to regular expression + * evaluation. The flow state encodes which parse mode flag was set. */ private module RegexParseModeConfig implements DataFlow::StateConfigSig { class FlowState = RegexParseMode; predicate isSource(DataFlow::Node node, FlowState flowstate) { // parse mode flag is set - any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, flowstate, true) + any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, true) } predicate isSink(DataFlow::Node node, FlowState flowstate) { @@ -73,11 +73,24 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig { predicate isBarrier(DataFlow::Node node) { none() } predicate isBarrier(DataFlow::Node node, FlowState flowstate) { - // parse mode flag is set or unset - any(RegexAdditionalFlowStep s).modifiesParseMode(node, _, flowstate, _) + // parse mode flag is unset + any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, false) } predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // flow through array construction + exists(ArrayExpr arr | + nodeFrom.asExpr() = arr.getAnElement() and + nodeTo.asExpr() = arr + ) + or + // flow through regex creation + exists(RegexCreation create | + nodeFrom = create.getOptionsInput() and + nodeTo = create + ) + or + // additional flow steps for regular expression objects any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) } diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 19c7448500a..a53e3d389fd 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -227,12 +227,12 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { _ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE // parse modes set through NSRegularExpression - _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE - _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL - _ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE" + _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=IGNORECASE + _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=DOTALL + _ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE" let myOptions1 : NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators] - _ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE" + _ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE" // parse modes set through other methods From 2dbbcc2413f19ae136f2323517c67dce0b0f8741 Mon Sep 17 00:00:00 2001 From: Tony Torralba Date: Wed, 19 Jul 2023 11:30:44 +0200 Subject: [PATCH 14/33] Java: Avoid low-confidence dispatch to InputStream methods Also adds a neutral model for `InputStream.read`, which offers a high-confidence alternative for this method. --- java/ql/lib/ext/java.io.model.yml | 1 + java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll | 2 ++ 2 files changed, 3 insertions(+) diff --git a/java/ql/lib/ext/java.io.model.yml b/java/ql/lib/ext/java.io.model.yml index 98c51a7bad5..6cc4933d7b5 100644 --- a/java/ql/lib/ext/java.io.model.yml +++ b/java/ql/lib/ext/java.io.model.yml @@ -116,6 +116,7 @@ extensions: - ["java.io", "File", "isDirectory", "()", "summary", "manual"] - ["java.io", "File", "mkdirs", "()", "summary", "manual"] - ["java.io", "FileInputStream", "FileInputStream", "(File)", "summary", "manual"] + - ["java.io", "InputStream", "read", "()", "summary", "manual"] - ["java.io", "InputStream", "close", "()", "summary", "manual"] - ["java.io", "OutputStream", "flush", "()", "summary", "manual"] # The below APIs have numeric flow and are currently being stored as neutral models. diff --git a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll index 4b880542229..c22f77725a1 100644 --- a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll +++ b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll @@ -102,6 +102,8 @@ private module Dispatch { or t instanceof Interface and not t.fromSource() or + t.hasQualifiedName("java.io", "InputStream") + or t.hasQualifiedName("java.io", "Serializable") or t.hasQualifiedName("java.lang", "Iterable") From 420008aed7fb58d686b9b94e8277fe0b66503f55 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 10:36:01 +0100 Subject: [PATCH 15/33] Swift: Minor corrections / clarifications. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index f52919456d1..6d7af5a7533 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -140,7 +140,7 @@ class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep { private predicate setsParseModeEdge( DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet ) { - // `Regex` methods that modify parse mode + // `Regex` methods that modify the parse mode of an existing `Regex` object. exists(CallExpr ce | nodeFrom.asExpr() = ce.getQualifier() and nodeTo.asExpr() = ce and @@ -166,11 +166,12 @@ class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep { /** * An additional flow step for `NSRegularExpression`. */ -class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep { +class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) { - // `NSRegularExpression.Options` values + // `NSRegularExpression.Options` values (these are typically combined, then passed into + // the `NSRegularExpression` initializer). node.asExpr() .(MemberRefExpr) .getMember() From 0660f98a3379e11e124aa19f1b71647a7b078592 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 10:37:34 +0100 Subject: [PATCH 16/33] Swift: Change note. --- .../ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md diff --git a/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md b/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md new file mode 100644 index 00000000000..46ff145715d --- /dev/null +++ b/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expression library now understands mode flags specified by `Regex` methods and the `NSRegularExpression` initializer. From 29543f572691c06574576bf92e144ec14ef67b63 Mon Sep 17 00:00:00 2001 From: Tony Torralba Date: Wed, 19 Jul 2023 14:44:18 +0200 Subject: [PATCH 17/33] Change InputStream.read from neutral to summary --- java/ql/lib/ext/java.io.model.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/ql/lib/ext/java.io.model.yml b/java/ql/lib/ext/java.io.model.yml index 6cc4933d7b5..e4d543aa06d 100644 --- a/java/ql/lib/ext/java.io.model.yml +++ b/java/ql/lib/ext/java.io.model.yml @@ -84,6 +84,7 @@ extensions: - ["java.io", "File", True, "toString", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "File", True, "toURI", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "FilterOutputStream", True, "FilterOutputStream", "(OutputStream)", "", "Argument[0]", "Argument[this]", "taint", "manual"] + - ["java.io", "InputStream", True, "read", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "InputStream", True, "read", "(byte[])", "", "Argument[this]", "Argument[0]", "taint", "manual"] - ["java.io", "InputStream", True, "read", "(byte[],int,int)", "", "Argument[this]", "Argument[0]", "taint", "manual"] - ["java.io", "InputStream", True, "readAllBytes", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] @@ -116,7 +117,6 @@ extensions: - ["java.io", "File", "isDirectory", "()", "summary", "manual"] - ["java.io", "File", "mkdirs", "()", "summary", "manual"] - ["java.io", "FileInputStream", "FileInputStream", "(File)", "summary", "manual"] - - ["java.io", "InputStream", "read", "()", "summary", "manual"] - ["java.io", "InputStream", "close", "()", "summary", "manual"] - ["java.io", "OutputStream", "flush", "()", "summary", "manual"] # The below APIs have numeric flow and are currently being stored as neutral models. From 5c47ea0f91eed2c6d4089b64f11712f36e78dd9b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 13:51:37 +0100 Subject: [PATCH 18/33] Swift: Missing QLDoc / typos / missing private. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index 6d7af5a7533..a9f2cd8c421 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -84,14 +84,20 @@ private class NSRegularExpressionRegexCreation extends RegexCreation { } } -newtype TRegexParseMode = +private newtype TRegexParseMode = MkIgnoreCase() or // case insensitive MkVerbose() or // ignores whitespace and `#` comments within patterns MkDotAll() or // dot matches all characters, including line terminators MkMultiLine() or // `^` and `$` also match beginning and end of lines MkUnicode() // Unicode UAX 29 word boundary mode +/** + * A regular expression parse mode flag. + */ class RegexParseMode extends TRegexParseMode { + /** + * Gets the name of this parse mode flag. + */ string getName() { this = MkIgnoreCase() and result = "IGNORECASE" or @@ -120,7 +126,7 @@ class RegexAdditionalFlowStep extends Unit { /** * Holds if a regular expression parse mode is either set (`isSet` = true) * or unset (`isSet` = false) at `node`. Parse modes propagate through - * array construction and regex constuction. + * array construction and regex construction. */ abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet); } From 238cb266247b496c2172f5c99e106158ab4563e7 Mon Sep 17 00:00:00 2001 From: Tony Torralba Date: Wed, 19 Jul 2023 15:37:33 +0200 Subject: [PATCH 19/33] Add change note --- java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md diff --git a/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md b/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md new file mode 100644 index 00000000000..d093c771d51 --- /dev/null +++ b/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- +* Improved the precision of virtual dispatch of `java.io.InputStream` methods. Now, calls to these methods will not dispatch to arbitrary implementations of `InputStream` if there is a high-confidence alternative (like a models-as-data summary). + \ No newline at end of file From 6fa0445e0f03a1d1f18342d51e458b75d0ca19f2 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 17:31:19 +0100 Subject: [PATCH 20/33] Swift: Fix QL-for-QL warning. --- swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index c238c35af88..46360b66636 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -67,7 +67,7 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig { predicate isSink(DataFlow::Node node, FlowState flowstate) { // evaluation of the regex node.asExpr() = any(RegexEval eval).getRegexInput() and - flowstate = any(FlowState fs) + exists(flowstate) } predicate isBarrier(DataFlow::Node node) { none() } From b91468607b8e47f47de7188e83c3f2ba7d9c862b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 17:45:26 +0100 Subject: [PATCH 21/33] Swift: Reluctantly QLDoc the toString. --- swift/ql/lib/codeql/swift/regex/Regex.qll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index a9f2cd8c421..0ae533d6843 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -110,6 +110,9 @@ class RegexParseMode extends TRegexParseMode { this = MkUnicode() and result = "UNICODE" } + /** + * Gets a textual representation of this `RegexParseMode`. + */ string toString() { result = this.getName() } } From a51ad1f4177dab61eadd8cf9c4ad667b8cdf890f Mon Sep 17 00:00:00 2001 From: Alexandre Boulgakov Date: Wed, 19 Jul 2023 18:28:51 +0100 Subject: [PATCH 22/33] Docs: Add armclang as supported C++ compiler. --- docs/codeql/reusables/supported-versions-compilers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/codeql/reusables/supported-versions-compilers.rst b/docs/codeql/reusables/supported-versions-compilers.rst index 42d4eaad956..81de83cef95 100644 --- a/docs/codeql/reusables/supported-versions-compilers.rst +++ b/docs/codeql/reusables/supported-versions-compilers.rst @@ -4,7 +4,7 @@ :stub-columns: 1 Language,Variants,Compilers,Extensions - C/C++,"C89, C99, C11, C17, C++98, C++03, C++11, C++14, C++17, C++20 [1]_","Clang (and clang-cl [2]_) extensions (up to Clang 12.0), + C/C++,"C89, C99, C11, C17, C++98, C++03, C++11, C++14, C++17, C++20 [1]_","Clang (including clang-cl [2]_ and armclang) extensions (up to Clang 12.0), GNU extensions (up to GCC 11.1), From 374f13e0dc3f137db2888eb46690e46d685a4f33 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan <62447351+owen-mc@users.noreply.github.com> Date: Thu, 20 Jul 2023 13:31:14 +0100 Subject: [PATCH 23/33] Revert "Go: Fix missing flow through receiver for function variable" --- .../2023-07-19-method-call-node.md | 4 -- go/ql/lib/semmle/go/controlflow/IR.qll | 2 +- .../go/dataflow/internal/DataFlowDispatch.qll | 6 +-- .../go/dataflow/internal/DataFlowNodes.qll | 40 +++---------------- go/ql/lib/semmle/go/frameworks/Gqlgen.qll | 2 +- .../semmle/go/frameworks/stdlib/NetHttp.qll | 2 +- go/ql/lib/semmle/go/security/ExternalAPIs.qll | 2 +- .../go/security/SafeUrlFlowCustomizations.qll | 4 +- .../UnhandledCloseWritableHandle.ql | 4 +- .../Security/CWE-352/ConstantOauth2State.ql | 2 +- .../src/experimental/CWE-1004/AuthCookie.qll | 8 ++-- .../src/experimental/CWE-285/PamAuthBypass.ql | 4 +- .../src/experimental/frameworks/CleverGo.qll | 16 ++++---- go/ql/src/experimental/frameworks/Fiber.qll | 8 ++-- .../generictypesandmethods.go | 12 +++--- .../semmle/go/frameworks/Yaml/tests.ql | 5 +-- 16 files changed, 42 insertions(+), 79 deletions(-) delete mode 100644 go/ql/lib/change-notes/2023-07-19-method-call-node.md diff --git a/go/ql/lib/change-notes/2023-07-19-method-call-node.md b/go/ql/lib/change-notes/2023-07-19-method-call-node.md deleted file mode 100644 index 765e8d5589e..00000000000 --- a/go/ql/lib/change-notes/2023-07-19-method-call-node.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: fix ---- -* The definition of `DataFlow::MethodCallNode` has been expanded to include `DataFlow::CallNode`s where what is being called is a variable that has a method assigned to it within the calling function. This means we can now follow data flow into the receiver of such a method call. diff --git a/go/ql/lib/semmle/go/controlflow/IR.qll b/go/ql/lib/semmle/go/controlflow/IR.qll index ce120674b4c..4df0867e4c6 100644 --- a/go/ql/lib/semmle/go/controlflow/IR.qll +++ b/go/ql/lib/semmle/go/controlflow/IR.qll @@ -744,7 +744,7 @@ module IR { override Type getResultType() { exists(CallExpr c | this.getBase() = evalExprInstruction(c) | - result = c.getCalleeType().getResultType(i) + result = c.getTarget().getResultType(i) ) or exists(Expr e | this.getBase() = evalExprInstruction(e) | diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll index 9bcfde9f9b0..ea768f54715 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll @@ -8,7 +8,7 @@ private import DataFlowPrivate private predicate isInterfaceCallReceiver( DataFlow::CallNode call, DataFlow::Node recv, InterfaceType tp, string m ) { - call.(DataFlow::MethodCallNode).getReceiver() = recv and + call.getReceiver() = recv and recv.getType().getUnderlyingType() = tp and m = call.getACalleeIncludingExternals().asFunction().getName() } @@ -149,9 +149,7 @@ predicate golangSpecificParamArgFilter( // Interface methods calls may be passed strictly to that exact method's model receiver: arg.getPosition() != -1 or - exists(Function callTarget | - callTarget = call.getNode().(DataFlow::CallNode).getACalleeIncludingExternals().asFunction() - | + exists(Function callTarget | callTarget = call.getNode().(DataFlow::CallNode).getTarget() | not isInterfaceMethod(callTarget) or callTarget = p.getCallable().asSummarizedCallable().asFunction() diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll index dc2811045ed..59224024ec3 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll @@ -439,8 +439,8 @@ module Public { CallNode getCallNode() { result = call } override Type getType() { - exists(SignatureType t | t = call.getCall().getCalleeType() | - result = t.getParameterType(t.getNumParameter() - 1) + exists(Function f | f = call.getTarget() | + result = f.getParameterType(f.getNumParameter() - 1) ) } @@ -474,11 +474,7 @@ module Public { class CallNode extends ExprNode { override CallExpr expr; - /** - * Gets the declared target of this call, if it exists. - * - * This doesn't exist when a function is called via a variable. - */ + /** Gets the declared target of this call */ Function getTarget() { result = expr.getTarget() } private DataFlow::Node getACalleeSource() { result = getACalleeSource(this) } @@ -626,40 +622,16 @@ module Public { /** Gets a result of this call. */ Node getAResult() { result = this.getResult(_) } - /** - * Gets the data flow node corresponding to the receiver of this call, if any. - * - * When a method value is assigned to a variable then when it is called it - * looks like a function call, as in the following example. - * ```go - * file, _ := os.Open("test.txt") - * f := file.Close - * f() - * ``` - * In this case we use local flow to try to find the receiver (`file` in - * the above example). - */ + /** Gets the data flow node corresponding to the receiver of this call, if any. */ Node getReceiver() { result = this.getACalleeSource().(MethodReadNode).getReceiver() } /** Holds if this call has an ellipsis after its last argument. */ predicate hasEllipsis() { expr.hasEllipsis() } } - /** - * A data flow node that represents a call to a method. - * - * When a method value is assigned to a variable then when it is called it - * syntactically looks like a function call, as in the following example. - * ```go - * file, _ := os.Open("test.txt") - * f := file.Close - * f() - * ``` - * In this case we use local flow to see whether a method is assigned to the - * function. - */ + /** A data flow node that represents a call to a method. */ class MethodCallNode extends CallNode { - MethodCallNode() { getACalleeSource(this) instanceof MethodReadNode } + MethodCallNode() { expr.getTarget() instanceof Method } override Method getTarget() { result = expr.getTarget() } diff --git a/go/ql/lib/semmle/go/frameworks/Gqlgen.qll b/go/ql/lib/semmle/go/frameworks/Gqlgen.qll index 0a86dcde0b9..a4c3993d5d4 100644 --- a/go/ql/lib/semmle/go/frameworks/Gqlgen.qll +++ b/go/ql/lib/semmle/go/frameworks/Gqlgen.qll @@ -7,7 +7,7 @@ module Gqlgen { /** An autogenerated file containing gqlgen code. */ private class GqlgenGeneratedFile extends File { GqlgenGeneratedFile() { - exists(DataFlow::MethodCallNode call | + exists(DataFlow::CallNode call | call.getReceiver().getType().hasQualifiedName("github.com/99designs/gqlgen/graphql", _) and call.getFile() = this ) diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index a32f9a5486f..b3f1d075c86 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -131,7 +131,7 @@ module NetHttp { ) or stack = SummaryComponentStack::argument(-1) and - result = call.(DataFlow::MethodCallNode).getReceiver() + result = call.getReceiver() } private class ResponseBody extends Http::ResponseBody::Range { diff --git a/go/ql/lib/semmle/go/security/ExternalAPIs.qll b/go/ql/lib/semmle/go/security/ExternalAPIs.qll index 689ddba1b18..8a27ce28c2a 100644 --- a/go/ql/lib/semmle/go/security/ExternalAPIs.qll +++ b/go/ql/lib/semmle/go/security/ExternalAPIs.qll @@ -86,7 +86,7 @@ class ExternalApiDataNode extends DataFlow::Node { this = call.getArgument(i) or // Receiver to a call to a method which returns non trivial value - this = call.(DataFlow::MethodCallNode).getReceiver() and + this = call.getReceiver() and i = -1 ) and // Not defined in the code that is being analyzed diff --git a/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll b/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll index 8685b4b5562..5f0572db11e 100644 --- a/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll +++ b/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll @@ -33,9 +33,7 @@ module SafeUrlFlow { /** A function model step using `UnsafeUrlMethod`, considered as a sanitizer for safe URL flow. */ private class UnsafeUrlMethodEdge extends SanitizerEdge { - UnsafeUrlMethodEdge() { - this = any(UnsafeUrlMethod um).getACall().(DataFlow::MethodCallNode).getReceiver() - } + UnsafeUrlMethodEdge() { this = any(UnsafeUrlMethod um).getACall().getReceiver() } } /** Any slicing of the URL, considered as a sanitizer for safe URL flow. */ diff --git a/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql b/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql index 413753ac26d..c78a9d4a36f 100644 --- a/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql +++ b/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql @@ -90,7 +90,7 @@ predicate isWritableFileHandle(DataFlow::Node source, DataFlow::CallNode call) { /** * Holds if `os.File.Close` is called on `sink`. */ -predicate isCloseSink(DataFlow::Node sink, DataFlow::MethodCallNode closeCall) { +predicate isCloseSink(DataFlow::Node sink, DataFlow::CallNode closeCall) { // find calls to the os.File.Close function closeCall = any(CloseFileFun f).getACall() and // that are unhandled @@ -115,7 +115,7 @@ predicate isCloseSink(DataFlow::Node sink, DataFlow::MethodCallNode closeCall) { * Holds if `os.File.Sync` is called on `sink` and the result of the call is neither * deferred nor discarded. */ -predicate isHandledSync(DataFlow::Node sink, DataFlow::MethodCallNode syncCall) { +predicate isHandledSync(DataFlow::Node sink, DataFlow::CallNode syncCall) { // find a call of the `os.File.Sync` function syncCall = any(SyncFileFun f).getACall() and // match the sink with the object on which the method is called diff --git a/go/ql/src/Security/CWE-352/ConstantOauth2State.ql b/go/ql/src/Security/CWE-352/ConstantOauth2State.ql index 57f899cc80a..abe982f7fe5 100644 --- a/go/ql/src/Security/CWE-352/ConstantOauth2State.ql +++ b/go/ql/src/Security/CWE-352/ConstantOauth2State.ql @@ -113,7 +113,7 @@ class PrivateUrlFlowsToAuthCodeUrlCall extends DataFlow::Configuration { ) } - predicate isSinkCall(DataFlow::Node sink, DataFlow::MethodCallNode call) { + predicate isSinkCall(DataFlow::Node sink, DataFlow::CallNode call) { exists(AuthCodeUrl m | call = m.getACall() | sink = call.getReceiver()) } diff --git a/go/ql/src/experimental/CWE-1004/AuthCookie.qll b/go/ql/src/experimental/CWE-1004/AuthCookie.qll index bbc0681d192..676249c7b8c 100644 --- a/go/ql/src/experimental/CWE-1004/AuthCookie.qll +++ b/go/ql/src/experimental/CWE-1004/AuthCookie.qll @@ -189,11 +189,11 @@ class GorillaCookieStoreSaveTrackingConfiguration extends DataFlow::Configuratio } override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::MethodCallNode mcn | - mcn.getTarget() + exists(DataFlow::MethodCallNode cn | + cn.getTarget() .hasQualifiedName(package("github.com/gorilla/sessions", ""), "CookieStore", "Get") and - pred = mcn.getReceiver() and - succ = mcn.getResult(0) + pred = cn.getReceiver() and + succ = cn.getResult(0) ) } } diff --git a/go/ql/src/experimental/CWE-285/PamAuthBypass.ql b/go/ql/src/experimental/CWE-285/PamAuthBypass.ql index 0de25e94a73..3e6864c021f 100644 --- a/go/ql/src/experimental/CWE-285/PamAuthBypass.ql +++ b/go/ql/src/experimental/CWE-285/PamAuthBypass.ql @@ -41,7 +41,7 @@ class PamStartToAcctMgmtConfig extends TaintTracking::Configuration { } override predicate isSink(DataFlow::Node sink) { - exists(PamAcctMgmt p | p.getACall().(DataFlow::MethodCallNode).getReceiver() = sink) + exists(PamAcctMgmt p | p.getACall().getReceiver() = sink) } } @@ -53,7 +53,7 @@ class PamStartToAuthenticateConfig extends TaintTracking::Configuration { } override predicate isSink(DataFlow::Node sink) { - exists(PamAuthenticate p | p.getACall().(DataFlow::MethodCallNode).getReceiver() = sink) + exists(PamAuthenticate p | p.getACall().getReceiver() = sink) } } diff --git a/go/ql/src/experimental/frameworks/CleverGo.qll b/go/ql/src/experimental/frameworks/CleverGo.qll index 20991dbdd03..2433ba4997a 100644 --- a/go/ql/src/experimental/frameworks/CleverGo.qll +++ b/go/ql/src/experimental/frameworks/CleverGo.qll @@ -174,7 +174,7 @@ private module CleverGo { /** * Models HTTP redirects. */ - private class HttpRedirect extends Http::Redirect::Range, DataFlow::MethodCallNode { + private class HttpRedirect extends Http::Redirect::Range, DataFlow::CallNode { DataFlow::Node urlNode; HttpRedirect() { @@ -211,7 +211,7 @@ private module CleverGo { string package, string receiverName, DataFlow::Node bodyNode, string contentTypeString, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -317,7 +317,7 @@ private module CleverGo { string package, string receiverName, DataFlow::Node bodyNode, DataFlow::Node contentTypeNode, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -356,7 +356,7 @@ private module CleverGo { private predicate setsBody( string package, string receiverName, DataFlow::Node receiverNode, DataFlow::Node bodyNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -400,7 +400,7 @@ private module CleverGo { // Holds for a call that sets a header with a key-value combination. private predicate setsHeaderDynamicKeyValue( - string package, string receiverName, DataFlow::MethodCallNode headerSetterCall, + string package, string receiverName, DataFlow::CallNode headerSetterCall, DataFlow::Node headerNameNode, DataFlow::Node headerValueNode, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -446,7 +446,7 @@ private module CleverGo { // Holds for a call that sets the content-type header (implicit). private predicate setsStaticHeaderContentType( - string package, string receiverName, DataFlow::MethodCallNode setterCall, string valueString, + string package, string receiverName, DataFlow::CallNode setterCall, string valueString, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -501,8 +501,8 @@ private module CleverGo { // Holds for a call that sets the content-type header via a parameter. private predicate setsDynamicHeaderContentType( - string package, string receiverName, DataFlow::MethodCallNode setterCall, - DataFlow::Node valueNode, DataFlow::Node receiverNode + string package, string receiverName, DataFlow::CallNode setterCall, DataFlow::Node valueNode, + DataFlow::Node receiverNode ) { exists(string methodName, Method met | met.hasQualifiedName(package, receiverName, methodName) and diff --git a/go/ql/src/experimental/frameworks/Fiber.qll b/go/ql/src/experimental/frameworks/Fiber.qll index 3f94d678705..27bb9bbcd10 100644 --- a/go/ql/src/experimental/frameworks/Fiber.qll +++ b/go/ql/src/experimental/frameworks/Fiber.qll @@ -129,7 +129,7 @@ private module Fiber { /** * Models HTTP redirects. */ - private class Redirect extends Http::Redirect::Range, DataFlow::MethodCallNode { + private class Redirect extends Http::Redirect::Range, DataFlow::CallNode { DataFlow::Node urlNode; Redirect() { @@ -167,7 +167,7 @@ private module Fiber { // Holds for a call that sets a header with a key-value combination. private predicate setsHeaderDynamicKeyValue( - string package, string receiverName, DataFlow::MethodCallNode headerSetterCall, + string package, string receiverName, DataFlow::CallNode headerSetterCall, DataFlow::Node headerNameNode, DataFlow::Node headerValueNode, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -215,7 +215,7 @@ private module Fiber { string package, string receiverName, DataFlow::Node bodyNode, string contentTypeString, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -254,7 +254,7 @@ private module Fiber { private predicate setsBody( string package, string receiverName, DataFlow::Node receiverNode, DataFlow::Node bodyNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() diff --git a/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go b/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go index 54aa78b7a90..4e2dc169c6d 100644 --- a/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go +++ b/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go @@ -51,7 +51,7 @@ func main() { gs1 := GenericStruct1[string]{""} gs1.Field = source() f := gs1.Getter - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs1 := GenericStruct1[string]{""} @@ -62,7 +62,7 @@ func main() { gs1 := GenericStruct1[string]{""} f := gs1.Setter f(source()) - sink(gs1.Field) // $ hasValueFlow="selection of Field" + sink(gs1.Field) // $ MISSING: hasValueFlow="selection of Field" } { @@ -87,7 +87,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} gs2.Field1 = source() f := gs2.Getter1 - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs2 := GenericStruct2[string, string]{"", ""} @@ -98,7 +98,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} f := gs2.Setter1 f(source()) - sink(gs2.Field1) // $ hasValueFlow="selection of Field1" + sink(gs2.Field1) // $ MISSING: hasValueFlow="selection of Field1" } { @@ -123,7 +123,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} gs2.Field2 = source() f := gs2.Getter2 - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs2 := GenericStruct2[string, string]{"", ""} @@ -134,6 +134,6 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} f := gs2.Setter2 f(source()) - sink(gs2.Field2) // $ hasValueFlow="selection of Field2" + sink(gs2.Field2) // $ MISSING: hasValueFlow="selection of Field2" } } diff --git a/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql b/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql index 169b334ef29..c4c0cafb50e 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql +++ b/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql @@ -13,10 +13,9 @@ DataFlow::CallNode getAYamlCall() { predicate isSourceSinkPair(DataFlow::Node inNode, DataFlow::Node outNode) { exists(DataFlow::CallNode cn | cn = getAYamlCall() | - inNode = [cn.getAnArgument(), cn.(DataFlow::MethodCallNode).getReceiver()] and + inNode = [cn.getAnArgument(), cn.getReceiver()] and ( - outNode.(DataFlow::PostUpdateNode).getPreUpdateNode() = - [cn.getAnArgument(), cn.(DataFlow::MethodCallNode).getReceiver()] + outNode.(DataFlow::PostUpdateNode).getPreUpdateNode() = [cn.getAnArgument(), cn.getReceiver()] or outNode = cn.getAResult() ) From 8e63bd6c78d3ad6889fdb28a7a4de0e06671f87f Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 20 Jul 2023 16:40:18 +0100 Subject: [PATCH 24/33] Correct Golang change note format --- .../2023-06-28--add-support-for-bun-framework.md | 5 +++-- .../2023-06-28--add-support-for-gqlgen-framework.md | 4 +++- .../2023-06-28--improve-support-for-gopg-framework.md | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md b/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md index 1eec83af074..c8cdc34f827 100644 --- a/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md @@ -1,3 +1,4 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for the [Bun framework](https://bun.uptrace.dev/) has been added. - diff --git a/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md b/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md index cf170b1e9fb..b1e030c13e9 100644 --- a/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md @@ -1,2 +1,4 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for [gqlgen](https://github.com/99designs/gqlgen) has been added. diff --git a/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md b/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md index ab6aa93c14b..4f321faa82b 100644 --- a/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md @@ -1,3 +1,5 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for the [go-pg framework](https://github.com/go-pg/pg) has been improved. From 4c9c5d8f0c60206eff7ace0de7189587efc346ef Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Thu, 20 Jul 2023 22:36:42 +0200 Subject: [PATCH 25/33] C++: Add IR SSA test case for the ternary operator --- .../ir/ssa/aliased_ssa_ir.expected | 43 +++++++++++++++++++ .../ir/ssa/aliased_ssa_ir_unsound.expected | 43 +++++++++++++++++++ cpp/ql/test/library-tests/ir/ssa/ssa.cpp | 4 ++ .../ir/ssa/unaliased_ssa_ir.expected | 42 ++++++++++++++++++ .../ir/ssa/unaliased_ssa_ir_unsound.expected | 42 ++++++++++++++++++ 5 files changed, 174 insertions(+) diff --git a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected index ebb54018d31..a357821fcf5 100644 --- a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected @@ -2157,3 +2157,46 @@ ssa.cpp: # 431| v431_9(void) = ReturnValue : &:r431_8, m435_4 # 431| v431_10(void) = AliasedUse : m431_3 # 431| v431_11(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| m438_2(unknown) = AliasedDefinition : +# 438| m438_3(unknown) = InitializeNonLocal : +# 438| m438_4(unknown) = Chi : total:m438_2, partial:m438_3 +# 438| r438_5(glval) = VariableAddress[a] : +# 438| m438_6(bool) = InitializeParameter[a] : &:r438_5 +# 438| r438_7(glval) = VariableAddress[x] : +# 438| m438_8(int) = InitializeParameter[x] : &:r438_7 +# 438| r438_9(glval) = VariableAddress[y] : +# 438| m438_10(int) = InitializeParameter[y] : &:r438_9 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_6 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_11(void) = ReturnVoid : +# 438| v438_12(void) = AliasedUse : m438_3 +# 438| v438_13(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_8 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_10 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected index a2390ac28e7..f9eb5b53828 100644 --- a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected +++ b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected @@ -2146,3 +2146,46 @@ ssa.cpp: # 431| v431_9(void) = ReturnValue : &:r431_8, m435_4 # 431| v431_10(void) = AliasedUse : m431_3 # 431| v431_11(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| m438_2(unknown) = AliasedDefinition : +# 438| m438_3(unknown) = InitializeNonLocal : +# 438| m438_4(unknown) = Chi : total:m438_2, partial:m438_3 +# 438| r438_5(glval) = VariableAddress[a] : +# 438| m438_6(bool) = InitializeParameter[a] : &:r438_5 +# 438| r438_7(glval) = VariableAddress[x] : +# 438| m438_8(int) = InitializeParameter[x] : &:r438_7 +# 438| r438_9(glval) = VariableAddress[y] : +# 438| m438_10(int) = InitializeParameter[y] : &:r438_9 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_6 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_11(void) = ReturnVoid : +# 438| v438_12(void) = AliasedUse : m438_3 +# 438| v438_13(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_8 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_10 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/ssa.cpp b/cpp/ql/test/library-tests/ir/ssa/ssa.cpp index fdeeb4ec2ba..56caf9de3b6 100644 --- a/cpp/ql/test/library-tests/ir/ssa/ssa.cpp +++ b/cpp/ql/test/library-tests/ir/ssa/ssa.cpp @@ -434,3 +434,7 @@ int noreturnTest2(int x) { } return x; } + +void Conditional(bool a, int x, int y) { + int z = a ? x : y; +} diff --git a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected index f51e8fef7ac..96b35a76c3b 100644 --- a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected @@ -2002,3 +2002,45 @@ ssa.cpp: # 431| v431_8(void) = ReturnValue : &:r431_7, m435_4 # 431| v431_9(void) = AliasedUse : ~m? # 431| v431_10(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| mu438_2(unknown) = AliasedDefinition : +# 438| mu438_3(unknown) = InitializeNonLocal : +# 438| r438_4(glval) = VariableAddress[a] : +# 438| m438_5(bool) = InitializeParameter[a] : &:r438_4 +# 438| r438_6(glval) = VariableAddress[x] : +# 438| m438_7(int) = InitializeParameter[x] : &:r438_6 +# 438| r438_8(glval) = VariableAddress[y] : +# 438| m438_9(int) = InitializeParameter[y] : &:r438_8 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_5 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_10(void) = ReturnVoid : +# 438| v438_11(void) = AliasedUse : ~m? +# 438| v438_12(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_7 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_9 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected index f51e8fef7ac..96b35a76c3b 100644 --- a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected +++ b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected @@ -2002,3 +2002,45 @@ ssa.cpp: # 431| v431_8(void) = ReturnValue : &:r431_7, m435_4 # 431| v431_9(void) = AliasedUse : ~m? # 431| v431_10(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| mu438_2(unknown) = AliasedDefinition : +# 438| mu438_3(unknown) = InitializeNonLocal : +# 438| r438_4(glval) = VariableAddress[a] : +# 438| m438_5(bool) = InitializeParameter[a] : &:r438_4 +# 438| r438_6(glval) = VariableAddress[x] : +# 438| m438_7(int) = InitializeParameter[x] : &:r438_6 +# 438| r438_8(glval) = VariableAddress[y] : +# 438| m438_9(int) = InitializeParameter[y] : &:r438_8 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_5 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_10(void) = ReturnVoid : +# 438| v438_11(void) = AliasedUse : ~m? +# 438| v438_12(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_7 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_9 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 From 108cd7f078d652e230ea4fa2c5a46d48e02b66cd Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 14:55:45 +0100 Subject: [PATCH 26/33] C++: Use more descriptive names for identifiers in 'cpp/invalid-pointer-deref'. --- .../AllocationToInvalidPointer.qll | 72 +++++++++---------- .../InvalidPointerToDereference.qll | 52 +++++++------- 2 files changed, 61 insertions(+), 63 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll index 61821ee5bca..05cc53eb27e 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -48,10 +48,8 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { * but because there's a strict comparison that compares `n` against the size of the allocation this * snippet is fine. */ -module Barrier2 { - private class FlowState2 = int; - - private module BarrierConfig2 implements DataFlow::ConfigSig { +module SizeBarrier { + private module SizeBarrierConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { // The sources is the same as in the sources for the second // projection in the `AllocToInvalidPointerConfig` module. @@ -59,19 +57,19 @@ module Barrier2 { } additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state, - boolean testIsTrue + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue ) { - // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + // The sink is any "large" side of a relational comparison. i.e., the `right` expression + // in a guard such as `left < right + k`. + g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue) } predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } } - private import DataFlow::Global + private import DataFlow::Global - private FlowState2 getAFlowStateForNode(DataFlow::Node node) { + private int getAFlowStateForNode(DataFlow::Node node) { exists(DataFlow::Node source | flow(source, node) and hasSize(_, source, result) @@ -79,14 +77,14 @@ module Barrier2 { } private predicate operandGuardChecks( - IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge + IRGuardCondition g, Operand left, Operand right, int state, boolean edge ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 | + exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k | nRight.asOperand() = right and nLeft.asOperand() = left and - BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and + SizeBarrierConfig::isSink(nLeft, nRight, g, k, edge) and state = getAFlowStateForNode(nRight) and - state0 <= state + k <= state ) } @@ -94,7 +92,7 @@ module Barrier2 { * Gets an instruction that is guarded by a guard condition which ensures that * the value of the instruction is upper-bounded by size of some allocation. */ - Instruction getABarrierInstruction(FlowState2 state) { + Instruction getABarrierInstruction(int state) { exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | use = value.getAUse() and operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, @@ -108,7 +106,7 @@ module Barrier2 { * Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that * the value of the node is upper-bounded by size of some allocation. */ - DataFlow::Node getABarrierNode(FlowState2 state) { + DataFlow::Node getABarrierNode(int state) { result.asOperand() = getABarrierInstruction(state).getAUse() } @@ -116,9 +114,7 @@ module Barrier2 { * Gets the block of a node that is guarded (see `getABarrierInstruction` or * `getABarrierNode` for the definition of what it means to be guarded). */ - IRBlock getABarrierBlock(FlowState2 state) { - result.getAnInstruction() = getABarrierInstruction(state) - } + IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) } } private module InterestingPointerAddInstruction { @@ -176,7 +172,7 @@ private module Config implements ProductFlow::StateConfigSig { class FlowState2 = int; predicate isSourcePair( - DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2 + DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 extra ) { // In the case of an allocation like // ```cpp @@ -184,21 +180,21 @@ private module Config implements ProductFlow::StateConfigSig { // ``` // we use `state2` to remember that there was an offset (in this case an offset of `1`) added // to the size of the allocation. This state is then checked in `isSinkPair`. - exists(state1) and - hasSize(source1.asConvertedExpr(), source2, state2) + exists(unit) and + hasSize(allocSource.asConvertedExpr(), sizeSource, extra) } predicate isSinkPair( - DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2 + DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 extra ) { - exists(state1) and + exists(unit) and // We check that the delta computed by the range analysis matches the // state value that we set in `isSourcePair`. - pointerAddInstructionHasBounds0(_, sink1, sink2, state2) + pointerAddInstructionHasBounds0(_, allocSink, sizeSink, extra) } predicate isBarrier2(DataFlow::Node node, FlowState2 state) { - node = Barrier2::getABarrierNode(state) + node = SizeBarrier::getABarrierNode(state) } predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) } @@ -226,17 +222,17 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; */ pragma[nomagic] private predicate pointerAddInstructionHasBounds0( - PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta + PointerAddInstruction pai, DataFlow::Node allocSink, DataFlow::Node sizeSink, int delta ) { InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and - exists(Instruction right, Instruction instr2 | + exists(Instruction right, Instruction sizeInstr | pai.getRight() = right and - pai.getLeft() = sink1.asInstruction() and - instr2 = sink2.asInstruction() and - // pai.getRight() <= sink2 + delta - bounded1(right, instr2, delta) and - not right = Barrier2::getABarrierInstruction(delta) and - not instr2 = Barrier2::getABarrierInstruction(delta) + pai.getLeft() = allocSink.asInstruction() and + sizeInstr = sizeSink.asInstruction() and + // pai.getRight() <= sizeSink + delta + bounded1(right, sizeInstr, delta) and + not right = SizeBarrier::getABarrierInstruction(delta) and + not sizeInstr = SizeBarrier::getABarrierInstruction(delta) ) } @@ -247,10 +243,10 @@ private predicate pointerAddInstructionHasBounds0( */ pragma[nomagic] predicate pointerAddInstructionHasBounds( - DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta + DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node allocSink, int delta ) { - exists(DataFlow::Node sink2 | - AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and - pointerAddInstructionHasBounds0(pai, sink1, sink2, delta) + exists(DataFlow::Node sizeSink | + AllocToInvalidPointerFlow::flow(allocation, _, allocSink, sizeSink) and + pointerAddInstructionHasBounds0(pai, allocSink, sizeSink, delta) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll index 3eb0a2da670..28a127d8d20 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll @@ -19,10 +19,10 @@ private module InvalidPointerToDerefBarrier { } additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int state, boolean testIsTrue + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue ) { // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue) } predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } @@ -40,12 +40,12 @@ private module InvalidPointerToDerefBarrier { private predicate operandGuardChecks( IRGuardCondition g, Operand left, Operand right, int state, boolean edge ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, int state0 | + exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k | nRight.asOperand() = right and nLeft.asOperand() = left and - BarrierConfig::isSink(nLeft, nRight, g, state0, edge) and + BarrierConfig::isSink(nLeft, nRight, g, k, edge) and state = getInvalidPointerToDerefSourceDelta(nRight) and - state0 <= state + k <= state ) } @@ -91,22 +91,24 @@ private import DataFlow::Global * * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such * as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this - * case `delta` is 1. + * case `derefSourcePaiDelta` is 1. */ private predicate invalidPointerToDerefSource( - DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta + DataFlow::Node allocSource, PointerArithmeticInstruction pai, DataFlow::Node derefSource, + int deltaDerefSourceAndPai ) { - exists(int delta0 | - // Note that `delta` is not necessarily equal to `delta0`: - // `delta0` is the constant offset added to the size of the allocation, and - // delta is the constant difference between the pointer-arithmetic instruction + exists(int rhsSizeDelta | + // Note that `deltaDerefSourceAndPai` is not necessarily equal to `rhsSizeDelta`: + // `rhsSizeDelta` is the constant offset added to the size of the allocation, and + // `deltaDerefSourceAndPai` is the constant difference between the pointer-arithmetic instruction // and the instruction computing the address for which we will search for a dereference. - AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and - bounded2(derefSource.asInstruction(), pai, delta) and - delta >= 0 and - // TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2` + AllocToInvalidPointer::pointerAddInstructionHasBounds(allocSource, pai, _, rhsSizeDelta) and + bounded2(derefSource.asInstruction(), pai, deltaDerefSourceAndPai) and + deltaDerefSourceAndPai >= 0 and + // TODO: This condition will go away once #13725 is merged, and then we can make `SizeBarrier` // private to `AllocationToInvalidPointer.qll`. - not derefSource.getBasicBlock() = AllocToInvalidPointer::Barrier2::getABarrierBlock(delta0) + not derefSource.getBasicBlock() = + AllocToInvalidPointer::SizeBarrier::getABarrierBlock(rhsSizeDelta) ) } @@ -117,15 +119,15 @@ private predicate invalidPointerToDerefSource( */ pragma[inline] private predicate isInvalidPointerDerefSink( - DataFlow::Node sink, Instruction i, string operation, int delta + DataFlow::Node sink, Instruction i, string operation, int deltaDerefSinkAndDerefAddress ) { exists(AddressOperand addr, Instruction s, IRBlock b | s = sink.asInstruction() and - bounded(addr.getDef(), s, delta) and - delta >= 0 and + bounded(addr.getDef(), s, deltaDerefSinkAndDerefAddress) and + deltaDerefSinkAndDerefAddress >= 0 and i.getAnOperand() = addr and b = i.getBlock() and - not b = InvalidPointerToDerefBarrier::getABarrierBlock(delta) + not b = InvalidPointerToDerefBarrier::getABarrierBlock(deltaDerefSinkAndDerefAddress) | i instanceof StoreInstruction and operation = "write" @@ -165,13 +167,13 @@ private predicate paiForDereferenceSink(PointerArithmeticInstruction pai, DataFl */ private predicate derefSinkToOperation( DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation, - string description, int delta + string description, int deltaDerefSinkAndDerefAddress ) { - exists(Instruction i | + exists(Instruction operationInstr | paiForDereferenceSink(pai, pragma[only_bind_into](derefSink)) and - isInvalidPointerDerefSink(derefSink, i, description, delta) and - i = getASuccessor(derefSink.asInstruction()) and - operation.asInstruction() = i + isInvalidPointerDerefSink(derefSink, operationInstr, description, deltaDerefSinkAndDerefAddress) and + operationInstr = getASuccessor(derefSink.asInstruction()) and + operation.asInstruction() = operationInstr ) } From 83aef6fc167ea1abf831d065e66ac12683bd4e25 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 14:56:40 +0100 Subject: [PATCH 27/33] C++: Write formulas instead of 'non-strictly upper bounded by'. --- .../AllocationToInvalidPointer.qll | 22 +++++++++---------- .../InvalidPointerToDereference.qll | 9 ++++---- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll index 05cc53eb27e..0228f0c74ff 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -42,9 +42,9 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { * ``` * In this case, the sink pair identified by the product flow library (without any additional barriers) * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic - * instruction `pai` such that: - * 1. The left-hand of `pai` flows from the allocation, and - * 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`) + * instruction `pai = a + b` such that: + * 1. the allocation flows to `a`, and + * 2. `b <= n` where `n` is the `n` in `p[n]` * but because there's a strict comparison that compares `n` against the size of the allocation this * snippet is fine. */ @@ -147,8 +147,8 @@ private module InterestingPointerAddInstruction { } /** - * A product-flow configuration for flow from an (allocation, size) pair to a - * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`. + * A product-flow configuration for flow from an `(allocation, size)` pair to a + * pointer-arithmetic operation `pai` such that `pai <= allocation + size`. * * The goal of this query is to find patterns such as: * ```cpp @@ -207,7 +207,7 @@ private module Config implements ProductFlow::StateConfigSig { private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; /** - * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the + * Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink` is the * left operand of the pointer-arithmetic operation. * * For example in, @@ -216,8 +216,8 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; * ``` * We will have: * - `pai` is `p + (size + 1)`, - * - `sink1` is `p` - * - `sink2` is `size` + * - `allocSink` is `p` + * - `sizeSink` is `size` * - `delta` is `1`. */ pragma[nomagic] @@ -237,9 +237,9 @@ private predicate pointerAddInstructionHasBounds0( } /** - * Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand - * side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai` - * is non-strictly upper bounded by the size of `alllocation` + `delta`. + * Holds if `allocation` flows to `allocSink` and `allocSink` represents the left operand + * of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and + * `b <= allocation + delta`. */ pragma[nomagic] predicate pointerAddInstructionHasBounds( diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll index 28a127d8d20..5945e533153 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll @@ -85,9 +85,8 @@ private module InvalidPointerToDerefConfig implements DataFlow::ConfigSig { private import DataFlow::Global /** - * Holds if `source1` is dataflow node that represents an allocation that flows to the - * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with - * a pointer-value that is non-strictly upper bounded by `pai + delta`. + * Holds if `allocSource` is dataflow node that represents an allocation that flows to the + * left-hand side of the pointer-arithmetic `pai`, and `derefSource <= pai + derefSourcePaiDelta`. * * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such * as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this @@ -114,8 +113,8 @@ private predicate invalidPointerToDerefSource( /** * Holds if `sink` is a sink for `InvalidPointerToDerefConfig` and `i` is a `StoreInstruction` that - * writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that - * reads from an address that non-strictly upper-bounds `sink`. + * writes to an address `addr` such that `addr <= sink`, or `i` is a `LoadInstruction` that + * reads from an address `addr` such that `addr <= sink`. */ pragma[inline] private predicate isInvalidPointerDerefSink( From d905b1e006e77ed05fb70b8824073d39c3d81008 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 15:38:38 +0100 Subject: [PATCH 28/33] C++: Add false positive. --- .../pointer-deref/InvalidPointerDeref.expected | 6 ++++++ .../Security/CWE/CWE-193/pointer-deref/test.cpp | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected index 881a1d8383d..86de9a6e1cf 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected @@ -221,6 +221,8 @@ edges | test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | | test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | +| test.cpp:695:13:695:26 | new[] | test.cpp:698:5:698:10 | ... += ... | +| test.cpp:698:5:698:10 | ... += ... | test.cpp:701:15:701:16 | * ... | nodes | test.cpp:4:15:4:20 | call to malloc | semmle.label | call to malloc | | test.cpp:5:15:5:22 | ... + ... | semmle.label | ... + ... | @@ -370,6 +372,9 @@ nodes | test.cpp:662:3:662:11 | ... = ... | semmle.label | ... = ... | | test.cpp:667:14:667:31 | new[] | semmle.label | new[] | | test.cpp:675:7:675:23 | ... = ... | semmle.label | ... = ... | +| test.cpp:695:13:695:26 | new[] | semmle.label | new[] | +| test.cpp:698:5:698:10 | ... += ... | semmle.label | ... += ... | +| test.cpp:701:15:701:16 | * ... | semmle.label | * ... | subpaths #select | test.cpp:6:14:6:15 | * ... | test.cpp:4:15:4:20 | call to malloc | test.cpp:6:14:6:15 | * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:4:15:4:20 | call to malloc | call to malloc | test.cpp:5:19:5:22 | size | size | @@ -405,3 +410,4 @@ subpaths | test.cpp:647:5:647:19 | ... = ... | test.cpp:642:14:642:31 | new[] | test.cpp:647:5:647:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:642:14:642:31 | new[] | new[] | test.cpp:647:8:647:14 | src_pos | src_pos | | test.cpp:662:3:662:11 | ... = ... | test.cpp:652:14:652:27 | new[] | test.cpp:662:3:662:11 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@ + 1. | test.cpp:652:14:652:27 | new[] | new[] | test.cpp:653:19:653:22 | size | size | | test.cpp:675:7:675:23 | ... = ... | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:667:14:667:31 | new[] | new[] | test.cpp:675:10:675:18 | ... ++ | ... ++ | +| test.cpp:701:15:701:16 | * ... | test.cpp:695:13:695:26 | new[] | test.cpp:701:15:701:16 | * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:695:13:695:26 | new[] | new[] | test.cpp:696:19:696:22 | size | size | diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp index cfcbb9de9a5..14db116bb8f 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp @@ -689,4 +689,15 @@ void test_missing_call_context_2(unsigned size) { int* p = new int[size]; int* end_minus_one = pointer_arithmetic(p, size - 1); *end_minus_one = '0'; // $ deref=L680->L690->L691 // GOOD +} + +void test34(unsigned size) { + char *p = new char[size]; + char *end = p + size + 1; // $ alloc=L695 + if (p + 1 < end) { + p += 1; + } + if (p + 1 < end) { + int val = *p; // $ deref=L698->L700->L701 // GOOD [FALSE POSITIVE] + } } \ No newline at end of file From 0859c4f926a3942347e296696cb687e33f4c26ee Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 16:51:52 +0100 Subject: [PATCH 29/33] C++: Fix swapped arguments in 'invalidPointerToDerefSource'. --- .../InvalidPointerToDereference.qll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll index 3eb0a2da670..67896a4de29 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll @@ -102,8 +102,10 @@ private predicate invalidPointerToDerefSource( // delta is the constant difference between the pointer-arithmetic instruction // and the instruction computing the address for which we will search for a dereference. AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and - bounded2(derefSource.asInstruction(), pai, delta) and - delta >= 0 and + // pai <= derefSource + delta and delta <= 0 is equivalent to + // derefSource >= pai + delta and delta >= 0 + bounded1(pai, derefSource.asInstruction(), delta) and + delta <= 0 and // TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2` // private to `AllocationToInvalidPointer.qll`. not derefSource.getBasicBlock() = AllocToInvalidPointer::Barrier2::getABarrierBlock(delta0) From d350c0d5c8ef328428bfd94bcca6672199fae0cd Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 16:52:21 +0100 Subject: [PATCH 30/33] C++: Accept test changes. --- .../InvalidPointerDeref.expected | 67 ------------------- .../CWE/CWE-193/pointer-deref/test.cpp | 30 ++++----- 2 files changed, 15 insertions(+), 82 deletions(-) diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected index 86de9a6e1cf..666434bc659 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected @@ -132,8 +132,6 @@ edges | test.cpp:304:15:304:26 | new[] | test.cpp:308:5:308:29 | ... = ... | | test.cpp:355:14:355:27 | new[] | test.cpp:356:15:356:23 | ... + ... | | test.cpp:355:14:355:27 | new[] | test.cpp:356:15:356:23 | ... + ... | -| test.cpp:355:14:355:27 | new[] | test.cpp:357:24:357:30 | ... + ... | -| test.cpp:355:14:355:27 | new[] | test.cpp:357:24:357:30 | ... + ... | | test.cpp:355:14:355:27 | new[] | test.cpp:358:14:358:26 | * ... | | test.cpp:355:14:355:27 | new[] | test.cpp:359:14:359:32 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:356:15:356:23 | ... + ... | @@ -141,88 +139,46 @@ edges | test.cpp:356:15:356:23 | ... + ... | test.cpp:358:14:358:26 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:359:14:359:32 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:359:14:359:32 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:357:24:357:30 | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:358:14:358:26 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:358:14:358:26 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:359:14:359:32 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:359:14:359:32 | * ... | | test.cpp:377:14:377:27 | new[] | test.cpp:378:15:378:23 | ... + ... | | test.cpp:377:14:377:27 | new[] | test.cpp:378:15:378:23 | ... + ... | -| test.cpp:377:14:377:27 | new[] | test.cpp:381:5:381:9 | ... ++ | -| test.cpp:377:14:377:27 | new[] | test.cpp:381:5:381:9 | ... ++ | | test.cpp:377:14:377:27 | new[] | test.cpp:384:13:384:16 | * ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:378:15:378:23 | ... + ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:384:13:384:16 | * ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:384:13:384:16 | * ... | -| test.cpp:381:5:381:9 | ... ++ | test.cpp:381:5:381:9 | ... ++ | -| test.cpp:381:5:381:9 | ... ++ | test.cpp:384:13:384:16 | * ... | | test.cpp:410:14:410:27 | new[] | test.cpp:411:15:411:23 | & ... | | test.cpp:410:14:410:27 | new[] | test.cpp:411:15:411:23 | & ... | -| test.cpp:410:14:410:27 | new[] | test.cpp:413:5:413:8 | ... ++ | -| test.cpp:410:14:410:27 | new[] | test.cpp:413:5:413:8 | ... ++ | | test.cpp:410:14:410:27 | new[] | test.cpp:415:7:415:15 | ... = ... | | test.cpp:411:15:411:23 | & ... | test.cpp:411:15:411:23 | & ... | | test.cpp:411:15:411:23 | & ... | test.cpp:415:7:415:15 | ... = ... | | test.cpp:411:15:411:23 | & ... | test.cpp:415:7:415:15 | ... = ... | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:413:5:413:8 | ... ++ | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:415:7:415:15 | ... = ... | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:415:7:415:15 | ... = ... | | test.cpp:421:14:421:27 | new[] | test.cpp:422:15:422:23 | & ... | | test.cpp:421:14:421:27 | new[] | test.cpp:422:15:422:23 | & ... | -| test.cpp:421:14:421:27 | new[] | test.cpp:424:5:424:8 | ... ++ | -| test.cpp:421:14:421:27 | new[] | test.cpp:424:5:424:8 | ... ++ | | test.cpp:421:14:421:27 | new[] | test.cpp:426:7:426:15 | ... = ... | | test.cpp:422:15:422:23 | & ... | test.cpp:422:15:422:23 | & ... | | test.cpp:422:15:422:23 | & ... | test.cpp:426:7:426:15 | ... = ... | | test.cpp:422:15:422:23 | & ... | test.cpp:426:7:426:15 | ... = ... | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:424:5:424:8 | ... ++ | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:426:7:426:15 | ... = ... | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:426:7:426:15 | ... = ... | | test.cpp:432:14:432:27 | new[] | test.cpp:433:15:433:23 | & ... | | test.cpp:432:14:432:27 | new[] | test.cpp:433:15:433:23 | & ... | -| test.cpp:432:14:432:27 | new[] | test.cpp:436:5:436:8 | ... ++ | -| test.cpp:432:14:432:27 | new[] | test.cpp:436:5:436:8 | ... ++ | | test.cpp:432:14:432:27 | new[] | test.cpp:438:7:438:15 | ... = ... | | test.cpp:433:15:433:23 | & ... | test.cpp:433:15:433:23 | & ... | | test.cpp:433:15:433:23 | & ... | test.cpp:438:7:438:15 | ... = ... | | test.cpp:433:15:433:23 | & ... | test.cpp:438:7:438:15 | ... = ... | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:436:5:436:8 | ... ++ | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:438:7:438:15 | ... = ... | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:438:7:438:15 | ... = ... | | test.cpp:444:14:444:27 | new[] | test.cpp:445:15:445:23 | & ... | | test.cpp:444:14:444:27 | new[] | test.cpp:445:15:445:23 | & ... | -| test.cpp:444:14:444:27 | new[] | test.cpp:448:5:448:8 | ... ++ | -| test.cpp:444:14:444:27 | new[] | test.cpp:448:5:448:8 | ... ++ | | test.cpp:444:14:444:27 | new[] | test.cpp:450:7:450:15 | ... = ... | | test.cpp:445:15:445:23 | & ... | test.cpp:445:15:445:23 | & ... | | test.cpp:445:15:445:23 | & ... | test.cpp:450:7:450:15 | ... = ... | | test.cpp:445:15:445:23 | & ... | test.cpp:450:7:450:15 | ... = ... | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:448:5:448:8 | ... ++ | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:450:7:450:15 | ... = ... | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:450:7:450:15 | ... = ... | | test.cpp:480:14:480:27 | new[] | test.cpp:481:15:481:23 | & ... | | test.cpp:480:14:480:27 | new[] | test.cpp:481:15:481:23 | & ... | -| test.cpp:480:14:480:27 | new[] | test.cpp:484:5:484:8 | ... ++ | -| test.cpp:480:14:480:27 | new[] | test.cpp:484:5:484:8 | ... ++ | | test.cpp:480:14:480:27 | new[] | test.cpp:486:7:486:15 | ... = ... | | test.cpp:481:15:481:23 | & ... | test.cpp:481:15:481:23 | & ... | | test.cpp:481:15:481:23 | & ... | test.cpp:486:7:486:15 | ... = ... | | test.cpp:481:15:481:23 | & ... | test.cpp:486:7:486:15 | ... = ... | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:484:5:484:8 | ... ++ | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:486:7:486:15 | ... = ... | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:486:7:486:15 | ... = ... | | test.cpp:543:14:543:27 | new[] | test.cpp:548:5:548:19 | ... = ... | | test.cpp:554:14:554:27 | new[] | test.cpp:559:5:559:19 | ... = ... | | test.cpp:642:14:642:31 | new[] | test.cpp:647:5:647:19 | ... = ... | -| test.cpp:652:14:652:27 | new[] | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:652:14:652:27 | new[] | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:652:14:652:27 | new[] | test.cpp:662:3:662:11 | ... = ... | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | -| test.cpp:695:13:695:26 | new[] | test.cpp:698:5:698:10 | ... += ... | -| test.cpp:698:5:698:10 | ... += ... | test.cpp:701:15:701:16 | * ... | nodes | test.cpp:4:15:4:20 | call to malloc | semmle.label | call to malloc | | test.cpp:5:15:5:22 | ... + ... | semmle.label | ... + ... | @@ -320,45 +276,31 @@ nodes | test.cpp:355:14:355:27 | new[] | semmle.label | new[] | | test.cpp:356:15:356:23 | ... + ... | semmle.label | ... + ... | | test.cpp:356:15:356:23 | ... + ... | semmle.label | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | semmle.label | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | semmle.label | ... + ... | | test.cpp:358:14:358:26 | * ... | semmle.label | * ... | | test.cpp:359:14:359:32 | * ... | semmle.label | * ... | | test.cpp:377:14:377:27 | new[] | semmle.label | new[] | | test.cpp:378:15:378:23 | ... + ... | semmle.label | ... + ... | | test.cpp:378:15:378:23 | ... + ... | semmle.label | ... + ... | -| test.cpp:381:5:381:9 | ... ++ | semmle.label | ... ++ | -| test.cpp:381:5:381:9 | ... ++ | semmle.label | ... ++ | | test.cpp:384:13:384:16 | * ... | semmle.label | * ... | | test.cpp:410:14:410:27 | new[] | semmle.label | new[] | | test.cpp:411:15:411:23 | & ... | semmle.label | & ... | | test.cpp:411:15:411:23 | & ... | semmle.label | & ... | -| test.cpp:413:5:413:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:413:5:413:8 | ... ++ | semmle.label | ... ++ | | test.cpp:415:7:415:15 | ... = ... | semmle.label | ... = ... | | test.cpp:421:14:421:27 | new[] | semmle.label | new[] | | test.cpp:422:15:422:23 | & ... | semmle.label | & ... | | test.cpp:422:15:422:23 | & ... | semmle.label | & ... | -| test.cpp:424:5:424:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:424:5:424:8 | ... ++ | semmle.label | ... ++ | | test.cpp:426:7:426:15 | ... = ... | semmle.label | ... = ... | | test.cpp:432:14:432:27 | new[] | semmle.label | new[] | | test.cpp:433:15:433:23 | & ... | semmle.label | & ... | | test.cpp:433:15:433:23 | & ... | semmle.label | & ... | -| test.cpp:436:5:436:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:436:5:436:8 | ... ++ | semmle.label | ... ++ | | test.cpp:438:7:438:15 | ... = ... | semmle.label | ... = ... | | test.cpp:444:14:444:27 | new[] | semmle.label | new[] | | test.cpp:445:15:445:23 | & ... | semmle.label | & ... | | test.cpp:445:15:445:23 | & ... | semmle.label | & ... | -| test.cpp:448:5:448:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:448:5:448:8 | ... ++ | semmle.label | ... ++ | | test.cpp:450:7:450:15 | ... = ... | semmle.label | ... = ... | | test.cpp:480:14:480:27 | new[] | semmle.label | new[] | | test.cpp:481:15:481:23 | & ... | semmle.label | & ... | | test.cpp:481:15:481:23 | & ... | semmle.label | & ... | -| test.cpp:484:5:484:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:484:5:484:8 | ... ++ | semmle.label | ... ++ | | test.cpp:486:7:486:15 | ... = ... | semmle.label | ... = ... | | test.cpp:543:14:543:27 | new[] | semmle.label | new[] | | test.cpp:548:5:548:19 | ... = ... | semmle.label | ... = ... | @@ -366,15 +308,8 @@ nodes | test.cpp:559:5:559:19 | ... = ... | semmle.label | ... = ... | | test.cpp:642:14:642:31 | new[] | semmle.label | new[] | | test.cpp:647:5:647:19 | ... = ... | semmle.label | ... = ... | -| test.cpp:652:14:652:27 | new[] | semmle.label | new[] | -| test.cpp:656:3:656:6 | ... ++ | semmle.label | ... ++ | -| test.cpp:656:3:656:6 | ... ++ | semmle.label | ... ++ | -| test.cpp:662:3:662:11 | ... = ... | semmle.label | ... = ... | | test.cpp:667:14:667:31 | new[] | semmle.label | new[] | | test.cpp:675:7:675:23 | ... = ... | semmle.label | ... = ... | -| test.cpp:695:13:695:26 | new[] | semmle.label | new[] | -| test.cpp:698:5:698:10 | ... += ... | semmle.label | ... += ... | -| test.cpp:701:15:701:16 | * ... | semmle.label | * ... | subpaths #select | test.cpp:6:14:6:15 | * ... | test.cpp:4:15:4:20 | call to malloc | test.cpp:6:14:6:15 | * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:4:15:4:20 | call to malloc | call to malloc | test.cpp:5:19:5:22 | size | size | @@ -408,6 +343,4 @@ subpaths | test.cpp:548:5:548:19 | ... = ... | test.cpp:543:14:543:27 | new[] | test.cpp:548:5:548:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:543:14:543:27 | new[] | new[] | test.cpp:548:8:548:14 | src_pos | src_pos | | test.cpp:559:5:559:19 | ... = ... | test.cpp:554:14:554:27 | new[] | test.cpp:559:5:559:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:554:14:554:27 | new[] | new[] | test.cpp:559:8:559:14 | src_pos | src_pos | | test.cpp:647:5:647:19 | ... = ... | test.cpp:642:14:642:31 | new[] | test.cpp:647:5:647:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:642:14:642:31 | new[] | new[] | test.cpp:647:8:647:14 | src_pos | src_pos | -| test.cpp:662:3:662:11 | ... = ... | test.cpp:652:14:652:27 | new[] | test.cpp:662:3:662:11 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@ + 1. | test.cpp:652:14:652:27 | new[] | new[] | test.cpp:653:19:653:22 | size | size | | test.cpp:675:7:675:23 | ... = ... | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:667:14:667:31 | new[] | new[] | test.cpp:675:10:675:18 | ... ++ | ... ++ | -| test.cpp:701:15:701:16 | * ... | test.cpp:695:13:695:26 | new[] | test.cpp:701:15:701:16 | * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:695:13:695:26 | new[] | new[] | test.cpp:696:19:696:22 | size | size | diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp index 14db116bb8f..f1d1b983c1f 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp @@ -17,7 +17,7 @@ void test2(int size) { char* q = p + size - 1; // $ alloc=L16 char a = *q; // GOOD char b = *(q - 1); // GOOD - char c = *(q + 1); // $ deref=L20 // BAD + char c = *(q + 1); // $ deref=L17->L20 // BAD char d = *(q + size); // BAD [NOT DETECTED] char e = *(q - size); // GOOD char f = *(q + size + 1); // BAD [NOT DETECTED] @@ -198,7 +198,7 @@ void test12(unsigned len, unsigned index) { return; } - p[index] = '\0'; // $ deref=L201 // BAD + p[index] = '\0'; // $ deref=L195->L201 deref=L197->L201 // BAD } void test13(unsigned len, unsigned index) { @@ -210,7 +210,7 @@ void test13(unsigned len, unsigned index) { return; } - *q = '\0'; // $ deref=L213 // BAD + *q = '\0'; // $ deref=L206->L213 deref=L209->L213 // BAD } bool unknown(); @@ -261,7 +261,7 @@ void test17(unsigned len) int *end = xs + len; // $ alloc=L260 for (int *x = xs; x <= end; x++) { - int i = *x; // $ deref=L264 // BAD + int i = *x; // $ deref=L261->L264 deref=L262->L264 // BAD } } @@ -271,7 +271,7 @@ void test18(unsigned len) int *end = xs + len; // $ alloc=L270 for (int *x = xs; x <= end; x++) { - *x = 0; // $ deref=L274 // BAD + *x = 0; // $ deref=L271->L274 deref=L272->L274 // BAD } } @@ -355,8 +355,8 @@ void test25(unsigned size) { char *xs = new char[size]; char *end = xs + size; // $ alloc=L355 char *end_plus_one = end + 1; - int val1 = *end_plus_one; // $ deref=L358+1 // BAD - int val2 = *(end_plus_one + 1); // $ deref=L359+2 // BAD + int val1 = *end_plus_one; // $ deref=L356->L358+1 deref=L357->L358+1 // BAD + int val2 = *(end_plus_one + 1); // $ deref=L356->L359+2 deref=L357->L359+2 // BAD } void test26(unsigned size) { @@ -381,7 +381,7 @@ void test27(unsigned size, bool b) { end++; } - int val = *end; // $ deref=L384+1 // BAD + int val = *end; // $ deref=L378->L384+1 deref=L381->L384+1 // BAD } void test28(unsigned size) { @@ -412,7 +412,7 @@ void test28_simple2(unsigned size) { if (xs < end) { xs++; if (xs < end + 1) { - xs[0] = 0; // $ deref=L415 // BAD + xs[0] = 0; // $ deref=L411->L415 deref=L412->L415 deref=L414->L415 // BAD } } } @@ -423,7 +423,7 @@ void test28_simple3(unsigned size) { if (xs < end) { xs++; if (xs - 1 < end) { - xs[0] = 0; // $ deref=L426 // BAD + xs[0] = 0; // $ deref=L422->L426 deref=L423->L426 deref=L425->L426 // BAD } } } @@ -435,7 +435,7 @@ void test28_simple4(unsigned size) { end++; xs++; if (xs < end) { - xs[0] = 0; // $ deref=L438 // BAD + xs[0] = 0; // $ deref=L433->L438 deref=L434->L438 deref=L435->L438 // BAD } } } @@ -447,7 +447,7 @@ void test28_simple5(unsigned size) { if (xs < end) { xs++; if (xs < end) { - xs[0] = 0; // $ deref=L450 // BAD + xs[0] = 0; // $ deref=L445->L450 deref=L446->L450 // BAD } } } @@ -483,7 +483,7 @@ void test28_simple8(unsigned size) { if (xs < end) { xs++; if (xs < end - 1) { - xs[0] = 0; // $ deref=L486+498 // BAD + xs[0] = 0; // $ deref=L481->L486+498 deref=L482->L486+498 // BAD } } } @@ -659,7 +659,7 @@ void test32(unsigned size) { xs++; if (xs >= end) return; - xs[0] = 0; // $ deref=L656->L662+1 deref=L657->L662+1 GOOD [FALSE POSITIVE] + xs[0] = 0; // $ GOOD } void test33(unsigned size, unsigned src_pos) @@ -698,6 +698,6 @@ void test34(unsigned size) { p += 1; } if (p + 1 < end) { - int val = *p; // $ deref=L698->L700->L701 // GOOD [FALSE POSITIVE] + int val = *p; // GOOD } } \ No newline at end of file From 742f080a55f8d0db97b1bda708f4b8f1f561b484 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 21 Jul 2023 16:52:36 +0100 Subject: [PATCH 31/33] C++: This predicate is no longer used. --- .../InvalidPointerDereference/RangeAnalysisUtil.qll | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll index 12bb50321fa..c15dc4d1d29 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll @@ -35,14 +35,5 @@ bindingset[i] pragma[inline_late] predicate bounded1(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } -/** - * Holds if `i <= b + delta`. - * - * This predicate enforces a join-order that ensures that `b` has already been bound. - */ -bindingset[b] -pragma[inline_late] -predicate bounded2(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } - /** Holds if `i <= b + delta`. */ predicate bounded = boundedImpl/3; From 419bbbc9ac89e01a0a701d180bf50fcc125a250b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 24 Jul 2023 00:17:53 +0000 Subject: [PATCH 32/33] Add changed framework coverage reports --- java/documentation/library-coverage/coverage.csv | 2 +- java/documentation/library-coverage/coverage.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/documentation/library-coverage/coverage.csv b/java/documentation/library-coverage/coverage.csv index 1681b511d92..b0d449b810a 100644 --- a/java/documentation/library-coverage/coverage.csv +++ b/java/documentation/library-coverage/coverage.csv @@ -55,7 +55,7 @@ jakarta.ws.rs.container,,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,, jakarta.ws.rs.core,2,,149,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,,,,,,94,55 java.awt,,,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3 java.beans,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1, -java.io,50,,45,,,22,,,,,,,,,,,,,,28,,,,,,,,,,,,,,,,,,,43,2 +java.io,50,,46,,,22,,,,,,,,,,,,,,28,,,,,,,,,,,,,,,,,,,44,2 java.lang,31,,93,,13,,,,,,,,,,,,8,,,5,,,4,,,1,,,,,,,,,,,,,57,36 java.net,13,3,23,,,,,,,,,,,,,,,,,,,,,,,,,,13,,,,,,,,,3,23, java.nio,53,,36,,,5,,,,,,,,,,,,,,47,,,,,,,,,1,,,,,,,,,,36, diff --git a/java/documentation/library-coverage/coverage.rst b/java/documentation/library-coverage/coverage.rst index d07f04c3897..80ace3a0b33 100644 --- a/java/documentation/library-coverage/coverage.rst +++ b/java/documentation/library-coverage/coverage.rst @@ -18,10 +18,10 @@ Java framework & library support `Google Guava `_,``com.google.common.*``,,730,41,7,,,,, JBoss Logging,``org.jboss.logging``,,,324,,,,,, `JSON-java `_,``org.json``,,236,,,,,,, - Java Standard Library,``java.*``,3,688,205,80,,9,,,18 + Java Standard Library,``java.*``,3,689,205,80,,9,,,18 Java extensions,"``javax.*``, ``jakarta.*``",63,672,34,2,4,,1,1,2 Kotlin Standard Library,``kotlin*``,,1849,16,14,,,,,2 `Spring `_,``org.springframework.*``,29,483,115,4,,28,14,,35 Others,"``antlr``, ``cn.hutool.core.codec``, ``com.alibaba.druid.sql``, ``com.esotericsoftware.kryo.io``, ``com.esotericsoftware.kryo5.io``, ``com.fasterxml.jackson.core``, ``com.fasterxml.jackson.databind``, ``com.google.gson``, ``com.hubspot.jinjava``, ``com.jcraft.jsch``, ``com.mitchellbosecke.pebble``, ``com.opensymphony.xwork2.ognl``, ``com.rabbitmq.client``, ``com.thoughtworks.xstream``, ``com.unboundid.ldap.sdk``, ``com.zaxxer.hikari``, ``flexjson``, ``freemarker.cache``, ``freemarker.template``, ``groovy.lang``, ``groovy.text``, ``groovy.util``, ``hudson``, ``io.jsonwebtoken``, ``io.netty.bootstrap``, ``io.netty.buffer``, ``io.netty.channel``, ``io.netty.handler.codec``, ``io.netty.handler.ssl``, ``io.netty.handler.stream``, ``io.netty.resolver``, ``io.netty.util``, ``javafx.scene.web``, ``jenkins``, ``jodd.json``, ``net.sf.json``, ``net.sf.saxon.s9api``, ``ognl``, ``okhttp3``, ``org.acegisecurity``, ``org.antlr.runtime``, ``org.apache.commons.codec``, ``org.apache.commons.compress.archivers.tar``, ``org.apache.commons.exec``, ``org.apache.commons.httpclient.util``, ``org.apache.commons.jelly``, ``org.apache.commons.jexl2``, ``org.apache.commons.jexl3``, ``org.apache.commons.lang``, ``org.apache.commons.logging``, ``org.apache.commons.net``, ``org.apache.commons.ognl``, ``org.apache.directory.ldap.client.api``, ``org.apache.hadoop.fs``, ``org.apache.hadoop.hive.metastore``, ``org.apache.hc.client5.http.async.methods``, ``org.apache.hc.client5.http.classic.methods``, ``org.apache.hc.client5.http.fluent``, ``org.apache.hive.hcatalog.templeton``, ``org.apache.ibatis.jdbc``, ``org.apache.log4j``, ``org.apache.shiro.codec``, ``org.apache.shiro.jndi``, ``org.apache.tools.ant``, ``org.apache.tools.zip``, ``org.apache.velocity.app``, ``org.apache.velocity.runtime``, ``org.codehaus.cargo.container.installer``, ``org.codehaus.groovy.control``, ``org.dom4j``, ``org.eclipse.jetty.client``, ``org.fusesource.leveldbjni``, ``org.geogebra.web.full.main``, ``org.gradle.api.file``, ``org.hibernate``, ``org.influxdb``, ``org.jdbi.v3.core``, ``org.jenkins.ui.icon``, ``org.jenkins.ui.symbol``, ``org.jooq``, ``org.kohsuke.stapler``, ``org.mvel2``, ``org.openjdk.jmh.runner.options``, ``org.scijava.log``, ``org.slf4j``, ``org.thymeleaf``, ``org.xml.sax``, ``org.xmlpull.v1``, ``org.yaml.snakeyaml``, ``play.libs.ws``, ``play.mvc``, ``ratpack.core.form``, ``ratpack.core.handling``, ``ratpack.core.http``, ``ratpack.exec``, ``ratpack.form``, ``ratpack.func``, ``ratpack.handling``, ``ratpack.http``, ``ratpack.util``, ``retrofit2``",126,5237,577,89,6,18,18,,200 - Totals,,283,13606,2067,290,16,122,33,1,391 + Totals,,283,13607,2067,290,16,122,33,1,391 From c44507cc42c0b74fc541db6bf2b11b5a01440c6f Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 24 Jul 2023 10:57:25 +0200 Subject: [PATCH 33/33] C++: 'sizeAddend' instead of 'extra'. --- .../AllocationToInvalidPointer.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll index 0228f0c74ff..efa307527cb 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -172,7 +172,7 @@ private module Config implements ProductFlow::StateConfigSig { class FlowState2 = int; predicate isSourcePair( - DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 extra + DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 sizeAddend ) { // In the case of an allocation like // ```cpp @@ -181,16 +181,16 @@ private module Config implements ProductFlow::StateConfigSig { // we use `state2` to remember that there was an offset (in this case an offset of `1`) added // to the size of the allocation. This state is then checked in `isSinkPair`. exists(unit) and - hasSize(allocSource.asConvertedExpr(), sizeSource, extra) + hasSize(allocSource.asConvertedExpr(), sizeSource, sizeAddend) } predicate isSinkPair( - DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 extra + DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 sizeAddend ) { exists(unit) and // We check that the delta computed by the range analysis matches the // state value that we set in `isSourcePair`. - pointerAddInstructionHasBounds0(_, allocSink, sizeSink, extra) + pointerAddInstructionHasBounds0(_, allocSink, sizeSink, sizeAddend) } predicate isBarrier2(DataFlow::Node node, FlowState2 state) {