From 9826e7df1d63a85cc1ff7ce3dbecb0944c8ca2ee Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Wed, 13 Oct 2021 21:53:37 +0200 Subject: [PATCH] Add codeql-ruby sources (236643fc43b8ae09e15dfa13e86bfdb61a106668) --- repo-tests/codeql-ruby.txt | 1 + .../ql/consistency-queries/AstConsistency.ql | 25 + .../ql/consistency-queries/CfgConsistency.ql | 1 + .../DataFlowConsistency.ql | 1 + .../ql/consistency-queries/SsaConsistency.ql | 22 + .../VariablesConsistency.ql | 6 + .../ql/consistency-queries/qlpack.yml | 5 + repo-tests/codeql-ruby/ql/examples/qlpack.yml | 4 + .../ql/examples/snippets/emptythen.ql | 18 + .../ql/lib/codeql/IDEContextual.qll | 19 + .../codeql-ruby/ql/lib/codeql/Locations.qll | 66 + .../ql/lib/codeql/files/FileSystem.qll | 173 + .../codeql-ruby/ql/lib/codeql/ruby/AST.qll | 141 + .../ql/lib/codeql/ruby/ApiGraphs.qll | 408 ++ .../codeql-ruby/ql/lib/codeql/ruby/CFG.qll | 5 + .../ql/lib/codeql/ruby/Concepts.qll | 585 +++ .../ql/lib/codeql/ruby/DataFlow.qll | 7 + .../ql/lib/codeql/ruby/DataFlow2.qll | 7 + .../ql/lib/codeql/ruby/Diagnostics.qll | 52 + .../ql/lib/codeql/ruby/Frameworks.qll | 11 + .../ql/lib/codeql/ruby/TaintTracking.qll | 7 + .../ql/lib/codeql/ruby/ast/Call.qll | 215 + .../ql/lib/codeql/ruby/ast/Constant.qll | 210 + .../ql/lib/codeql/ruby/ast/Control.qll | 611 +++ .../ql/lib/codeql/ruby/ast/Erb.qll | 313 ++ .../ql/lib/codeql/ruby/ast/Expr.qll | 456 ++ .../ql/lib/codeql/ruby/ast/Literal.qll | 892 ++++ .../ql/lib/codeql/ruby/ast/Method.qll | 228 + .../ql/lib/codeql/ruby/ast/Module.qll | 365 ++ .../ql/lib/codeql/ruby/ast/Operation.qll | 620 +++ .../ql/lib/codeql/ruby/ast/Parameter.qll | 248 + .../ql/lib/codeql/ruby/ast/Pattern.qll | 96 + .../ql/lib/codeql/ruby/ast/Scope.qll | 22 + .../ql/lib/codeql/ruby/ast/Statement.qll | 248 + .../ql/lib/codeql/ruby/ast/Variable.qll | 187 + .../ql/lib/codeql/ruby/ast/internal/AST.qll | 704 +++ .../ql/lib/codeql/ruby/ast/internal/Call.qll | 186 + .../ql/lib/codeql/ruby/ast/internal/Erb.qll | 43 + .../lib/codeql/ruby/ast/internal/Module.qll | 409 ++ .../codeql/ruby/ast/internal/Operation.qll | 198 + .../codeql/ruby/ast/internal/Parameter.qll | 19 + .../lib/codeql/ruby/ast/internal/Pattern.qll | 32 + .../ql/lib/codeql/ruby/ast/internal/Scope.qll | 109 + .../codeql/ruby/ast/internal/Synthesis.qll | 797 +++ .../codeql/ruby/ast/internal/TreeSitter.qll | 2000 ++++++++ .../lib/codeql/ruby/ast/internal/Variable.qll | 604 +++ .../codeql/ruby/controlflow/BasicBlocks.qll | 414 ++ .../lib/codeql/ruby/controlflow/CfgNodes.qll | 484 ++ .../ruby/controlflow/ControlFlowGraph.qll | 341 ++ .../ruby/controlflow/internal/Completion.qll | 507 ++ .../internal/ControlFlowGraphImpl.qll | 1164 +++++ .../internal/ControlFlowGraphImplShared.qll | 945 ++++ .../internal/ControlFlowGraphImplSpecific.qll | 74 + .../controlflow/internal/NonReturning.qll | 22 + .../ruby/controlflow/internal/Splitting.qll | 336 ++ .../codeql/ruby/dataflow/BarrierGuards.qll | 75 + .../lib/codeql/ruby/dataflow/FlowSummary.qll | 125 + .../ruby/dataflow/RemoteFlowSources.qll | 37 + .../ql/lib/codeql/ruby/dataflow/SSA.qll | 385 ++ .../dataflow/internal/DataFlowDispatch.qll | 459 ++ .../ruby/dataflow/internal/DataFlowImpl.qll | 4559 +++++++++++++++++ .../ruby/dataflow/internal/DataFlowImpl2.qll | 4559 +++++++++++++++++ .../dataflow/internal/DataFlowImplCommon.qll | 1294 +++++ .../internal/DataFlowImplConsistency.qll | 181 + .../internal/DataFlowImplSpecific.qll | 11 + .../dataflow/internal/DataFlowPrivate.qll | 799 +++ .../ruby/dataflow/internal/DataFlowPublic.qll | 210 + .../dataflow/internal/FlowSummaryImpl.qll | 964 ++++ .../internal/FlowSummaryImplSpecific.qll | 117 + .../codeql/ruby/dataflow/internal/SsaImpl.qll | 289 ++ .../ruby/dataflow/internal/SsaImplCommon.qll | 637 +++ .../dataflow/internal/SsaImplSpecific.qll | 34 + .../internal/TaintTrackingPrivate.qll | 41 + .../dataflow/internal/TaintTrackingPublic.qll | 31 + .../tainttracking1/TaintTrackingImpl.qll | 120 + .../tainttracking1/TaintTrackingParameter.qll | 6 + .../lib/codeql/ruby/filters/GeneratedCode.qll | 43 + .../ruby/frameworks/ActionController.qll | 259 + .../lib/codeql/ruby/frameworks/ActionView.qll | 138 + .../codeql/ruby/frameworks/ActiveRecord.qll | 319 ++ .../ql/lib/codeql/ruby/frameworks/Files.qll | 299 ++ .../codeql/ruby/frameworks/HttpClients.qll | 12 + .../ruby/frameworks/StandardLibrary.qll | 337 ++ .../lib/codeql/ruby/frameworks/XmlParsing.qll | 182 + .../ruby/frameworks/http_clients/Excon.qll | 130 + .../ruby/frameworks/http_clients/Faraday.qll | 140 + .../frameworks/http_clients/HttpClient.qll | 55 + .../ruby/frameworks/http_clients/Httparty.qll | 95 + .../ruby/frameworks/http_clients/NetHttp.qll | 69 + .../ruby/frameworks/http_clients/OpenURI.qll | 113 + .../frameworks/http_clients/RestClient.qll | 71 + .../ruby/frameworks/http_clients/Typhoeus.qll | 74 + .../ql/lib/codeql/ruby/printAst.qll | 203 + .../ruby/regexp/ExponentialBackTracking.qll | 343 ++ .../ql/lib/codeql/ruby/regexp/ParseRegExp.qll | 891 ++++ .../regexp/PolynomialReDoSCustomizations.qll | 131 + .../ruby/regexp/PolynomialReDoSQuery.qll | 37 + .../ql/lib/codeql/ruby/regexp/ReDoSUtil.qll | 1186 +++++ .../lib/codeql/ruby/regexp/RegExpTreeView.qll | 724 +++ .../ruby/regexp/SuperlinearBackTracking.qll | 420 ++ .../security/CodeInjectionCustomizations.qll | 40 + .../ruby/security/CodeInjectionQuery.qll | 29 + .../CommandInjectionCustomizations.qll | 54 + .../ruby/security/CommandInjectionQuery.qll | 32 + .../ruby/security/ReflectedXSSQuery.qll | 39 + .../codeql/ruby/security/StoredXSSQuery.qll | 40 + .../UnsafeDeserializationCustomizations.qll | 190 + .../security/UnsafeDeserializationQuery.qll | 34 + .../security/UrlRedirectCustomizations.qll | 127 + .../codeql/ruby/security/UrlRedirectQuery.qll | 34 + .../ql/lib/codeql/ruby/security/XSS.qll | 369 ++ .../codeql/ruby/typetracking/TypeTracker.qll | 470 ++ .../ruby/typetracking/TypeTrackerSpecific.qll | 146 + repo-tests/codeql-ruby/ql/lib/qlpack.yml | 6 + repo-tests/codeql-ruby/ql/lib/ruby.dbscheme | 1318 +++++ repo-tests/codeql-ruby/ql/lib/ruby.qll | 1 + repo-tests/codeql-ruby/ql/lib/tutorial.qll | 1207 +++++ .../codeql-ruby/ql/src/AlertSuppression.ql | 82 + .../src/experimental/performance/UseDetect.ql | 64 + .../ql/src/filters/ClassifyFiles.ql | 20 + .../localDefinitions.ql | 20 + .../ide-contextual-queries/localReferences.ql | 21 + .../ql/src/ide-contextual-queries/printAst.ql | 27 + repo-tests/codeql-ruby/ql/src/qlpack.yml | 7 + .../ql/src/queries/analysis/Definitions.ql | 81 + .../queries/diagnostics/ExtractionErrors.ql | 18 + .../diagnostics/SuccessfullyExtractedFiles.ql | 16 + .../ql/src/queries/metrics/FLines.ql | 13 + .../ql/src/queries/metrics/FLinesOfCode.ql | 14 + .../src/queries/metrics/FLinesOfComments.ql | 13 + .../security/cwe-078/CommandInjection.ql | 25 + .../queries/security/cwe-078/KernelOpen.ql | 76 + .../queries/security/cwe-079/ReflectedXSS.ql | 24 + .../src/queries/security/cwe-079/StoredXSS.ql | 23 + .../queries/security/cwe-089/SqlInjection.ql | 39 + .../queries/security/cwe-094/CodeInjection.ql | 27 + .../security/cwe-1333/PolynomialReDoS.ql | 31 + .../ql/src/queries/security/cwe-1333/ReDoS.ql | 25 + .../cwe-295/RequestWithoutValidation.ql | 20 + .../security/cwe-502/UnsafeDeserialization.ql | 21 + .../queries/security/cwe-601/UrlRedirect.ql | 22 + .../ql/src/queries/security/cwe-611/Xxe.ql | 43 + .../security/cwe-732/WeakFilePermissions.ql | 64 + .../security/cwe-798/HardcodedCredentials.ql | 155 + .../ql/src/queries/summary/LinesOfCode.ql | 15 + .../ql/src/queries/summary/LinesOfUserCode.ql | 19 + .../NumberOfFilesExtractedWithErrors.ql | 15 + .../NumberOfSuccessfullyExtractedFiles.ql | 15 + .../src/queries/variables/DeadStoreOfLocal.ql | 28 + .../queries/variables/UninitializedLocal.ql | 32 + .../src/queries/variables/UnusedParameter.ql | 27 + 151 files changed, 42145 insertions(+) create mode 100644 repo-tests/codeql-ruby.txt create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql create mode 100644 repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml create mode 100644 repo-tests/codeql-ruby/ql/examples/qlpack.yml create mode 100644 repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll create mode 100755 repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Method.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll create mode 100755 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll create mode 100755 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/SuperlinearBackTracking.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/qlpack.yml create mode 100644 repo-tests/codeql-ruby/ql/lib/ruby.dbscheme create mode 100644 repo-tests/codeql-ruby/ql/lib/ruby.qll create mode 100644 repo-tests/codeql-ruby/ql/lib/tutorial.qll create mode 100644 repo-tests/codeql-ruby/ql/src/AlertSuppression.ql create mode 100644 repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql create mode 100644 repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql create mode 100644 repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql create mode 100644 repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql create mode 100644 repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql create mode 100644 repo-tests/codeql-ruby/ql/src/qlpack.yml create mode 100644 repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql create mode 100644 repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql diff --git a/repo-tests/codeql-ruby.txt b/repo-tests/codeql-ruby.txt new file mode 100644 index 00000000000..a4f62379f8a --- /dev/null +++ b/repo-tests/codeql-ruby.txt @@ -0,0 +1 @@ +236643fc43b8ae09e15dfa13e86bfdb61a106668 diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql new file mode 100644 index 00000000000..8a5ebcdcda7 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql @@ -0,0 +1,25 @@ +import codeql.ruby.AST +import codeql.ruby.ast.internal.Synthesis + +query predicate missingParent(AstNode node, string cls) { + not exists(node.getParent()) and + node.getLocation().getFile().getExtension() != "erb" and + not node instanceof Toplevel and + cls = node.getPrimaryQlClasses() +} + +pragma[noinline] +private AstNode parent(AstNode child, int desugarLevel) { + result = child.getParent() and + desugarLevel = desugarLevel(result) +} + +query predicate multipleParents(AstNode node, AstNode parent, string cls) { + parent = node.getParent() and + cls = parent.getPrimaryQlClasses() and + exists(AstNode one, AstNode two, int desugarLevel | + one = parent(node, desugarLevel) and + two = parent(node, desugarLevel) and + one != two + ) +} diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql new file mode 100644 index 00000000000..c2aaaad0ac1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql @@ -0,0 +1 @@ +import codeql.ruby.controlflow.internal.ControlFlowGraphImplShared::Consistency diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql new file mode 100644 index 00000000000..f5bc9552ab6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql @@ -0,0 +1 @@ +import codeql.ruby.dataflow.internal.DataFlowImplConsistency::Consistency diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql new file mode 100644 index 00000000000..79289273f95 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql @@ -0,0 +1,22 @@ +import ruby +import codeql.ruby.dataflow.SSA +import codeql.ruby.controlflow.ControlFlowGraph + +query predicate nonUniqueDef(CfgNode read, Ssa::Definition def) { + read = def.getARead() and + exists(Ssa::Definition other | read = other.getARead() and other != def) +} + +query predicate readWithoutDef(LocalVariableReadAccess read) { + exists(CfgNode node | + node = read.getAControlFlowNode() and + not node = any(Ssa::Definition def).getARead() + ) +} + +query predicate deadDef(Ssa::Definition def, LocalVariable v) { + v = def.getSourceVariable() and + not v.isCaptured() and + not exists(def.getARead()) and + not def = any(Ssa::PhiNode phi).getAnInput() +} diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql new file mode 100644 index 00000000000..ed2183340d9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql @@ -0,0 +1,6 @@ +import codeql.ruby.ast.Variable + +query predicate ambiguousVariable(VariableAccess access, Variable variable) { + access.getVariable() = variable and + count(access.getVariable()) > 1 +} diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml b/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml new file mode 100644 index 00000000000..fa76023b646 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml @@ -0,0 +1,5 @@ +name: codeql/ruby-consistency-queries +version: 0.0.1 +dependencies: + codeql/ruby-all: 0.0.1 + diff --git a/repo-tests/codeql-ruby/ql/examples/qlpack.yml b/repo-tests/codeql-ruby/ql/examples/qlpack.yml new file mode 100644 index 00000000000..87a6ffae9c1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/examples/qlpack.yml @@ -0,0 +1,4 @@ +name: codeql/ruby-examples +version: 0.0.2 +dependencies: + codeql/ruby-all: ^0.0.2 diff --git a/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql b/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql new file mode 100644 index 00000000000..531556fc7fa --- /dev/null +++ b/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql @@ -0,0 +1,18 @@ +/** + * @name If statements with empty then branch + * @description Finds 'if' statements where the 'then' branch is + * an empty block statement + * @id ruby/examples/emptythen + * @tags if + * then + * empty + * conditional + * branch + * statement + */ + +import ruby + +from IfExpr i +where not exists(i.getThen().getAChild()) +select i diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll b/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll new file mode 100644 index 00000000000..0e58b1d878b --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll @@ -0,0 +1,19 @@ +private import codeql.files.FileSystem + +/** + * Returns an appropriately encoded version of a filename `name` + * passed by the VS Code extension in order to coincide with the + * output of `.getFile()` on locatable entities. + */ +cached +File getFileBySourceArchiveName(string name) { + // The name provided for a file in the source archive by the VS Code extension + // has some differences from the absolute path in the database: + // 1. colons are replaced by underscores + // 2. there's a leading slash, even for Windows paths: "C:/foo/bar" -> + // "/C_/foo/bar" + // 3. double slashes in UNC prefixes are replaced with a single slash + // We can handle 2 and 3 together by unconditionally adding a leading slash + // before replacing double slashes. + name = ("/" + result.getAbsolutePath().replaceAll(":", "_")).replaceAll("//", "/") +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll new file mode 100644 index 00000000000..bd43633d49a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll @@ -0,0 +1,66 @@ +/** Provides classes for working with locations. */ + +import files.FileSystem + +/** + * A location as given by a file, a start line, a start column, + * an end line, and an end column. + * + * For more information about locations see [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ +class Location extends @location { + /** Gets the file for this location. */ + File getFile() { locations_default(this, result, _, _, _, _) } + + /** Gets the 1-based line number (inclusive) where this location starts. */ + int getStartLine() { locations_default(this, _, result, _, _, _) } + + /** Gets the 1-based column number (inclusive) where this location starts. */ + int getStartColumn() { locations_default(this, _, _, result, _, _) } + + /** Gets the 1-based line number (inclusive) where this location ends. */ + int getEndLine() { locations_default(this, _, _, _, result, _) } + + /** Gets the 1-based column number (inclusive) where this location ends. */ + int getEndColumn() { locations_default(this, _, _, _, _, result) } + + /** Gets the number of lines covered by this location. */ + int getNumLines() { result = getEndLine() - getStartLine() + 1 } + + /** Gets a textual representation of this element. */ + string toString() { + exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and + result = filepath + "@" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn + ) + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(File f | + locations_default(this, f, startline, startcolumn, endline, endcolumn) and + filepath = f.getAbsolutePath() + ) + } + + /** Holds if this location starts strictly before the specified location. */ + pragma[inline] + predicate strictlyBefore(Location other) { + this.getStartLine() < other.getStartLine() + or + this.getStartLine() = other.getStartLine() and this.getStartColumn() < other.getStartColumn() + } +} + +/** An entity representing an empty location. */ +class EmptyLocation extends Location { + EmptyLocation() { this.hasLocationInfo("", 0, 0, 0, 0) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll b/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll new file mode 100644 index 00000000000..e8b6a8ff691 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll @@ -0,0 +1,173 @@ +/** Provides classes for working with files and folders. */ + +private import codeql.Locations + +/** A file or folder. */ +abstract class Container extends @container { + /** Gets a file or sub-folder in this container. */ + Container getAChildContainer() { this = result.getParentContainer() } + + /** Gets a file in this container. */ + File getAFile() { result = getAChildContainer() } + + /** Gets a sub-folder in this container. */ + Folder getAFolder() { result = getAChildContainer() } + + /** + * Gets the absolute, canonical path of this container, using forward slashes + * as path separator. + * + * The path starts with a _root prefix_ followed by zero or more _path + * segments_ separated by forward slashes. + * + * The root prefix is of one of the following forms: + * + * 1. A single forward slash `/` (Unix-style) + * 2. An upper-case drive letter followed by a colon and a forward slash, + * such as `C:/` (Windows-style) + * 3. Two forward slashes, a computer name, and then another forward slash, + * such as `//FileServer/` (UNC-style) + * + * Path segments are never empty (that is, absolute paths never contain two + * contiguous slashes, except as part of a UNC-style root prefix). Also, path + * segments never contain forward slashes, and no path segment is of the + * form `.` (one dot) or `..` (two dots). + * + * Note that an absolute path never ends with a forward slash, except if it is + * a bare root prefix, that is, the path has no path segments. A container + * whose absolute path has no segments is always a `Folder`, not a `File`. + */ + abstract string getAbsolutePath(); + + /** + * Gets the base name of this container including extension, that is, the last + * segment of its absolute path, or the empty string if it has no segments. + * + * Here are some examples of absolute paths and the corresponding base names + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + * + *
Absolute pathBase name
"/tmp/tst.go""tst.go"
"C:/Program Files (x86)""Program Files (x86)"
"/"""
"C:/"""
"D:/"""
"//FileServer/"""
+ */ + string getBaseName() { + result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1) + } + + /** + * Gets the extension of this container, that is, the suffix of its base name + * after the last dot character, if any. + * + * In particular, + * + * - if the name does not include a dot, there is no extension, so this + * predicate has no result; + * - if the name ends in a dot, the extension is the empty string; + * - if the name contains multiple dots, the extension follows the last dot. + * + * Here are some examples of absolute paths and the corresponding extensions + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathExtension
"/tmp/tst.go""go"
"/tmp/.classpath""classpath"
"/bin/bash"not defined
"/tmp/tst2."""
"/tmp/x.tar.gz""gz"
+ */ + string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) } + + /** Gets the file in this container that has the given `baseName`, if any. */ + File getFile(string baseName) { + result = getAFile() and + result.getBaseName() = baseName + } + + /** Gets the sub-folder in this container that has the given `baseName`, if any. */ + Folder getFolder(string baseName) { + result = getAFolder() and + result.getBaseName() = baseName + } + + /** Gets the parent container of this file or folder, if any. */ + Container getParentContainer() { containerparent(result, this) } + + /** + * Gets the relative path of this file or folder from the root folder of the + * analyzed source location. The relative path of the root folder itself is + * the empty string. + * + * This has no result if the container is outside the source root, that is, + * if the root folder is not a reflexive, transitive parent of this container. + */ + string getRelativePath() { + exists(string absPath, string pref | + absPath = getAbsolutePath() and sourceLocationPrefix(pref) + | + absPath = pref and result = "" + or + absPath = pref.regexpReplaceAll("/$", "") + "/" + result and + not result.matches("/%") + ) + } + + /** + * Gets the stem of this container, that is, the prefix of its base name up to + * (but not including) the last dot character if there is one, or the entire + * base name if there is not. + * + * Here are some examples of absolute paths and the corresponding stems + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathStem
"/tmp/tst.go""tst"
"/tmp/.classpath"""
"/bin/bash""bash"
"/tmp/tst2.""tst2"
"/tmp/x.tar.gz""x.tar"
+ */ + string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) } + + /** + * Gets a URL representing the location of this container. + * + * For more information see https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls. + */ + abstract string getURL(); + + /** + * Gets a textual representation of the path of this container. + * + * This is the absolute path of the container. + */ + string toString() { result = getAbsolutePath() } +} + +/** A folder. */ +class Folder extends Container, @folder { + override string getAbsolutePath() { folders(this, result) } + + /** Gets the URL of this folder. */ + override string getURL() { result = "folder://" + getAbsolutePath() } +} + +/** A file. */ +class File extends Container, @file { + override string getAbsolutePath() { files(this, result) } + + /** Gets the URL of this file. */ + override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" } + + /** Holds if this file was extracted from ordinary source code. */ + predicate fromSource() { any() } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll new file mode 100644 index 00000000000..2d006b6312a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll @@ -0,0 +1,141 @@ +import codeql.Locations +import ast.Call +import ast.Control +import ast.Constant +import ast.Erb +import ast.Expr +import ast.Literal +import ast.Method +import ast.Module +import ast.Parameter +import ast.Operation +import ast.Pattern +import ast.Scope +import ast.Statement +import ast.Variable +private import ast.internal.AST +private import ast.internal.Scope +private import ast.internal.Synthesis +private import ast.internal.TreeSitter + +/** + * A node in the abstract syntax tree. This class is the base class for all Ruby + * program elements. + */ +class AstNode extends TAstNode { + /** + * Gets the name of a primary CodeQL class to which this node belongs. + * + * This predicate always has a result. If no primary class can be + * determined, the result is `"???"`. If multiple primary classes match, + * this predicate can have multiple results. + */ + string getAPrimaryQlClass() { result = "???" } + + /** + * Gets a comma-separated list of the names of the primary CodeQL classes to + * which this element belongs. + */ + final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") } + + /** Gets the enclosing module, if any. */ + ModuleBase getEnclosingModule() { + exists(Scope::Range s | + s = scopeOf(toGeneratedInclSynth(this)) and + toGeneratedInclSynth(result) = s.getEnclosingModule() + ) + } + + /** Gets the enclosing method, if any. */ + MethodBase getEnclosingMethod() { + exists(Scope::Range s | + s = scopeOf(toGeneratedInclSynth(this)) and + toGeneratedInclSynth(result) = s.getEnclosingMethod() + ) + } + + /** Gets a textual representation of this node. */ + cached + string toString() { none() } + + /** Gets the location of this node. */ + Location getLocation() { result = getLocation(this) } + + /** Gets the file of this node. */ + final File getFile() { result = this.getLocation().getFile() } + + /** Gets a child node of this `AstNode`. */ + final AstNode getAChild() { result = this.getAChild(_) } + + /** Gets the parent of this `AstNode`, if this node is not a root node. */ + final AstNode getParent() { result.getAChild() = this } + + /** + * Gets a child of this node, which can also be retrieved using a predicate + * named `pred`. + */ + cached + AstNode getAChild(string pred) { + pred = "getDesugared" and + result = this.getDesugared() + } + + /** + * Holds if this node was synthesized to represent an implicit AST node not + * present in the source code. In the following example method call, the + * receiver is an implicit `self` reference, for which there is a synthesized + * `Self` node. + * + * ```rb + * foo(123) + * ``` + */ + final predicate isSynthesized() { this = getSynthChild(_, _) } + + /** + * Gets the desugared version of this AST node, if any. + * + * For example, the desugared version of + * + * ```rb + * x += y + * ``` + * + * is + * + * ```rb + * x = x + y + * ``` + * + * when `x` is a variable. Whenever an AST node can be desugared, + * then the desugared version is used in the control-flow graph. + */ + final AstNode getDesugared() { result = getSynthChild(this, -1) } +} + +/** A Ruby source file */ +class RubyFile extends File { + RubyFile() { ruby_ast_node_parent(_, this, _) } + + /** Gets a token in this file. */ + private Ruby::Token getAToken() { result.getLocation().getFile() = this } + + /** Holds if `line` contains a token. */ + private predicate line(int line, boolean comment) { + exists(Ruby::Token token, Location l | + token = this.getAToken() and + l = token.getLocation() and + line in [l.getStartLine() .. l.getEndLine()] and + if token instanceof @ruby_token_comment then comment = true else comment = false + ) + } + + /** Gets the number of lines in this file. */ + int getNumberOfLines() { result = max([0, this.getAToken().getLocation().getEndLine()]) } + + /** Gets the number of lines of code in this file. */ + int getNumberOfLinesOfCode() { result = count(int line | this.line(line, false)) } + + /** Gets the number of lines of comments in this file. */ + int getNumberOfLinesOfComments() { result = count(int line | this.line(line, true)) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll new file mode 100644 index 00000000000..f260251cd24 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -0,0 +1,408 @@ +/** + * Provides an implementation of _API graphs_, which are an abstract representation of the API + * surface used and/or defined by a code base. + * + * The nodes of the API graph represent definitions and uses of API components. The edges are + * directed and labeled; they specify how the components represented by nodes relate to each other. + */ + +private import ruby +import codeql.ruby.DataFlow +import codeql.ruby.typetracking.TypeTracker +import codeql.ruby.ast.internal.Module +private import codeql.ruby.controlflow.CfgNodes + +/** + * Provides classes and predicates for working with APIs used in a database. + */ +module API { + /** + * An abstract representation of a definition or use of an API component such as a Ruby module, + * or the result of a method call. + */ + class Node extends Impl::TApiNode { + /** + * Gets a data-flow node corresponding to a use of the API component represented by this node. + * + * For example, `Kernel.format "%s world!", "Hello"` is a use of the return of the `format` function of + * the `Kernel` module. + * + * This includes indirect uses found via data flow. + */ + DataFlow::Node getAUse() { + exists(DataFlow::LocalSourceNode src | Impl::use(this, src) | + Impl::trackUseNode(src).flowsTo(result) + ) + } + + /** + * Gets an immediate use of the API component represented by this node. + * + * Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses + * found via data flow. + */ + DataFlow::LocalSourceNode getAnImmediateUse() { Impl::use(this, result) } + + /** + * Gets a call to a method on the receiver represented by this API component. + */ + DataFlow::CallNode getAMethodCall(string method) { + result = getReturn(method).getAnImmediateUse() + } + + /** + * Gets a node representing member `m` of this API component. + * + * For example, a member can be: + * + * - A submodule of a module + * - An attribute of an object + */ + bindingset[m] + bindingset[result] + Node getMember(string m) { result = getASuccessor(Label::member(m)) } + + /** + * Gets a node representing a member of this API component where the name of the member is + * not known statically. + */ + Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) } + + /** + * Gets a node representing a member of this API component where the name of the member may + * or may not be known statically. + */ + Node getAMember() { + result = getASuccessor(Label::member(_)) or + result = getUnknownMember() + } + + /** + * Gets a node representing an instance of this API component, that is, an object whose + * constructor is the function represented by this node. + * + * For example, if this node represents a use of some class `A`, then there might be a node + * representing instances of `A`, typically corresponding to expressions `new A()` at the + * source level. + * + * This predicate may have multiple results when there are multiple constructor calls invoking this API component. + * Consider using `getAnInstantiation()` if there is a need to distinguish between individual constructor calls. + */ + Node getInstance() { result = getASuccessor(Label::instance()) } + + /** + * Gets a node representing the result of calling a method on the receiver represented by this node. + */ + Node getReturn(string method) { result = getASuccessor(Label::return(method)) } + + /** + * Gets a `new` call to the function represented by this API component. + */ + DataFlow::Node getAnInstantiation() { result = getInstance().getAnImmediateUse() } + + /** + * Gets a node representing a subclass of the class represented by this node. + */ + Node getASubclass() { result = getASuccessor(Label::subclass()) } + + /** + * Gets a string representation of the lexicographically least among all shortest access paths + * from the root to this node. + */ + string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) } + + /** + * Gets a node such that there is an edge in the API graph between this node and the other + * one, and that edge is labeled with `lbl`. + */ + Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) } + + /** + * Gets a node such that there is an edge in the API graph between that other node and + * this one, and that edge is labeled with `lbl` + */ + Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) } + + /** + * Gets a node such that there is an edge in the API graph between this node and the other + * one. + */ + Node getAPredecessor() { result = getAPredecessor(_) } + + /** + * Gets a node such that there is an edge in the API graph between that other node and + * this one. + */ + Node getASuccessor() { result = getASuccessor(_) } + + /** + * Gets the data-flow node that gives rise to this node, if any. + */ + DataFlow::Node getInducingNode() { this = Impl::MkUse(result) } + + /** Gets the location of this node. */ + Location getLocation() { + result = this.getInducingNode().getLocation() + or + // For nodes that do not have a meaningful location, `path` is the empty string and all other + // parameters are zero. + not exists(getInducingNode()) and + result instanceof EmptyLocation + } + + /** + * Gets a textual representation of this element. + */ + abstract string toString(); + + /** + * Gets a path of the given `length` from the root to this node. + */ + private string getAPath(int length) { + this instanceof Impl::MkRoot and + length = 0 and + result = "" + or + exists(Node pred, string lbl, string predpath | + Impl::edge(pred, lbl, this) and + lbl != "" and + predpath = pred.getAPath(length - 1) and + exists(string dot | if length = 1 then dot = "" else dot = "." | + result = predpath + dot + lbl and + // avoid producing strings longer than 1MB + result.length() < 1000 * 1000 + ) + ) and + length in [1 .. Impl::distanceFromRoot(this)] + } + + /** Gets the shortest distance from the root to this node in the API graph. */ + int getDepth() { result = Impl::distanceFromRoot(this) } + } + + /** The root node of an API graph. */ + class Root extends Node, Impl::MkRoot { + override string toString() { result = "root" } + } + + /** A node corresponding to the use of an API component. */ + class Use extends Node, Impl::MkUse { + override string toString() { + exists(string type | type = "Use " | + result = type + getPath() + or + not exists(this.getPath()) and result = type + "with no path" + ) + } + } + + /** Gets the root node. */ + Root root() { any() } + + /** + * Gets a node corresponding to a top-level member `m` (typically a module). + * + * This is equivalent to `root().getAMember("m")`. + * + * Note: You should only use this predicate for top level modules or classes. If you want nodes corresponding to a nested module or class, + * you should use `.getMember` on the parent module/class. For example, for nodes corresponding to the class `Gem::Version`, + * use `getTopLevelMember("Gem").getMember("Version")`. + */ + Node getTopLevelMember(string m) { result = root().getMember(m) } + + /** + * Provides the actual implementation of API graphs, cached for performance. + * + * Ideally, we'd like nodes to correspond to (global) access paths, with edge labels + * corresponding to extending the access path by one element. We also want to be able to map + * nodes to their definitions and uses in the data-flow graph, and this should happen modulo + * (inter-procedural) data flow. + * + * This, however, is not easy to implement, since access paths can have unbounded length + * and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing + * a condition like "this node hasn't been involved in constructing any predecessor of + * this node in the API graph" without negative recursion is tricky. + * + * So instead most nodes are directly associated with a data-flow node, representing + * either a use or a definition of an API component. This ensures that we only have a finite + * number of nodes. However, we can now have multiple nodes with the same access + * path, which are essentially indistinguishable for a client of the API. + * + * On the other hand, a single node can have multiple access paths (which is, of + * course, unavoidable). We pick as canonical the alphabetically least access path with + * shortest length. + */ + cached + private module Impl { + cached + newtype TApiNode = + /** The root of the API graph. */ + MkRoot() or + /** A use of an API member at the node `nd`. */ + MkUse(DataFlow::Node nd) { isUse(nd) } + + private string resolveTopLevel(ConstantReadAccess read) { + TResolved(result) = resolveScopeExpr(read) and + not result.matches("%::%") + } + + /** + * Holds if `ref` is a use of a node that should have an incoming edge from the root + * node labeled `lbl` in the API graph. + */ + cached + predicate useRoot(string lbl, DataFlow::Node ref) { + exists(string name, ExprNodes::ConstantAccessCfgNode access, ConstantReadAccess read | + access = ref.asExpr() and + lbl = Label::member(read.getName()) and + read = access.getExpr() + | + name = resolveTopLevel(read) + or + name = read.getName() and + not exists(resolveTopLevel(read)) and + not exists(read.getScopeExpr()) + ) + } + + /** + * Holds if `ref` is a use of a node that should have an incoming edge from use node + * `base` labeled `lbl` in the API graph. + */ + cached + predicate useUse(DataFlow::LocalSourceNode base, string lbl, DataFlow::Node ref) { + exists(ExprCfgNode node | + // First, we find a predecessor of the node `ref` that we want to determine. The predecessor + // is any node that is a type-tracked use of a data flow node (`src`), which is itself a + // reference to the API node `base`. Thus, `pred` and `src` both represent uses of `base`. + // + // Once we have identified the predecessor, we define its relation to the successor `ref` as + // well as the label on the edge from `pred` to `ref`. This label describes the nature of + // the relationship between `pred` and `ref`. + useExpr(node, base) + | + // // Referring to an attribute on a node that is a use of `base`: + // pred = `Rails` part of `Rails::Whatever` + // lbl = `Whatever` + // ref = `Rails::Whatever` + exists(ExprNodes::ConstantAccessCfgNode c, ConstantReadAccess read | + not exists(resolveTopLevel(read)) and + node = c.getScopeExpr() and + lbl = Label::member(read.getName()) and + ref.asExpr() = c and + read = c.getExpr() + ) + or + // Calling a method on a node that is a use of `base` + exists(ExprNodes::MethodCallCfgNode call, string name | + node = call.getReceiver() and + name = call.getExpr().getMethodName() and + lbl = Label::return(name) and + name != "new" and + ref.asExpr() = call + ) + or + // Calling the `new` method on a node that is a use of `base`, which creates a new instance + exists(ExprNodes::MethodCallCfgNode call | + node = call.getReceiver() and + lbl = Label::instance() and + call.getExpr().getMethodName() = "new" and + ref.asExpr() = call + ) + ) + } + + pragma[nomagic] + private predicate isUse(DataFlow::Node nd) { + useRoot(_, nd) + or + useUse(_, _, nd) + } + + pragma[nomagic] + private predicate useExpr(ExprCfgNode node, DataFlow::LocalSourceNode src) { + exists(DataFlow::LocalSourceNode pred | + pred = trackUseNode(src) and + pred.flowsTo(any(DataFlow::ExprNode n | n.getExprNode() = node)) + ) + } + + /** + * Holds if `ref` is a use of node `nd`. + */ + cached + predicate use(TApiNode nd, DataFlow::Node ref) { nd = MkUse(ref) } + + /** + * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. + * + * The flow from `src` to that node may be inter-procedural. + */ + private DataFlow::LocalSourceNode trackUseNode(DataFlow::Node src, TypeTracker t) { + // Declaring `src` to be a `LocalSourceNode` currently causes a redundant check in the + // recursive case, so instead we check it explicitly here. + src instanceof DataFlow::LocalSourceNode and + t.start() and + isUse(src) and + result = src + or + exists(TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t)) + } + + /** + * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. + * + * The flow from `src` to that node may be inter-procedural. + */ + cached + DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) { + result = trackUseNode(src, TypeTracker::end()) + } + + /** + * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. + */ + cached + predicate edge(TApiNode pred, string lbl, TApiNode succ) { + /* Every node that is a use of an API component is itself added to the API graph. */ + exists(DataFlow::LocalSourceNode ref | succ = MkUse(ref) | + pred = MkRoot() and + useRoot(lbl, ref) + or + exists(DataFlow::Node nd | + pred = MkUse(nd) and + useUse(nd, lbl, ref) + ) + ) + } + + /** + * Holds if there is an edge from `pred` to `succ` in the API graph. + */ + private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) } + + /** Gets the shortest distance from the root to `nd` in the API graph. */ + cached + int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result) + } +} + +private module Label { + /** Gets the `member` edge label for member `m`. */ + bindingset[m] + bindingset[result] + string member(string m) { result = "getMember(\"" + m + "\")" } + + /** Gets the `member` edge label for the unknown member. */ + string unknownMember() { result = "getUnknownMember()" } + + /** Gets the `instance` edge label. */ + string instance() { result = "instance" } + + /** Gets the `return` edge label. */ + bindingset[m] + bindingset[result] + string return(string m) { result = "getReturn(\"" + m + "\")" } + + string subclass() { result = "getASubclass()" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll new file mode 100644 index 00000000000..77507b05a7f --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll @@ -0,0 +1,5 @@ +/** Provides classes representing the control flow graph. */ + +import controlflow.ControlFlowGraph +import controlflow.CfgNodes as CfgNodes +import controlflow.BasicBlocks diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll new file mode 100644 index 00000000000..f06995d1d36 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll @@ -0,0 +1,585 @@ +/** + * Provides abstract classes representing generic concepts such as file system + * access or system command execution, for which individual framework libraries + * provide concrete subclasses. + */ + +private import codeql.ruby.AST +private import codeql.ruby.CFG +private import codeql.ruby.DataFlow +private import codeql.ruby.Frameworks +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.ApiGraphs + +/** + * A data-flow node that executes SQL statements. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `SqlExecution::Range` instead. + */ +class SqlExecution extends DataFlow::Node instanceof SqlExecution::Range { + /** Gets the argument that specifies the SQL statements to be executed. */ + DataFlow::Node getSql() { result = super.getSql() } +} + +/** Provides a class for modeling new SQL execution APIs. */ +module SqlExecution { + /** + * A data-flow node that executes SQL statements. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SqlExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument that specifies the SQL statements to be executed. */ + abstract DataFlow::Node getSql(); + } +} + +/** + * A data flow node that performs a file system access, including reading and writing data, + * creating and deleting files and folders, checking and updating permissions, and so on. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `FileSystemAccess::Range` instead. + */ +class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range { + /** Gets an argument to this file system access that is interpreted as a path. */ + DataFlow::Node getAPathArgument() { result = super.getAPathArgument() } +} + +/** Provides a class for modeling new file system access APIs. */ +module FileSystemAccess { + /** + * A data-flow node that performs a file system access, including reading and writing data, + * creating and deleting files and folders, checking and updating permissions, and so on. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `FileSystemAccess` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets an argument to this file system access that is interpreted as a path. */ + abstract DataFlow::Node getAPathArgument(); + } +} + +/** + * A data flow node that reads data from the file system. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `FileSystemReadAccess::Range` instead. + */ +class FileSystemReadAccess extends FileSystemAccess instanceof FileSystemReadAccess::Range { + /** + * Gets a node that represents data read from the file system access. + */ + DataFlow::Node getADataNode() { result = FileSystemReadAccess::Range.super.getADataNode() } +} + +/** Provides a class for modeling new file system reads. */ +module FileSystemReadAccess { + /** + * A data flow node that reads data from the file system. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `FileSystemReadAccess` instead. + */ + abstract class Range extends FileSystemAccess::Range { + /** + * Gets a node that represents data read from the file system. + */ + abstract DataFlow::Node getADataNode(); + } +} + +/** + * A data flow node that sets the permissions for one or more files. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `FileSystemPermissionModification::Range` instead. + */ +class FileSystemPermissionModification extends DataFlow::Node instanceof FileSystemPermissionModification::Range { + /** + * Gets an argument to this permission modification that is interpreted as a + * set of permissions. + */ + DataFlow::Node getAPermissionNode() { result = super.getAPermissionNode() } +} + +/** Provides a class for modeling new file system permission modifications. */ +module FileSystemPermissionModification { + /** + * A data-flow node that sets permissions for a one or more files. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `FileSystemPermissionModification` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets an argument to this permission modification that is interpreted as a + * set of permissions. + */ + abstract DataFlow::Node getAPermissionNode(); + } +} + +/** + * A data flow node that contains a file name or an array of file names from the local file system. + */ +abstract class FileNameSource extends DataFlow::Node { } + +/** + * A data-flow node that escapes meta-characters, which could be used to prevent + * injection attacks. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `Escaping::Range` instead. + */ +class Escaping extends DataFlow::Node instanceof Escaping::Range { + Escaping() { + // escapes that don't have _both_ input/output defined are not valid + exists(super.getAnInput()) and + exists(super.getOutput()) + } + + /** Gets an input that will be escaped. */ + DataFlow::Node getAnInput() { result = super.getAnInput() } + + /** Gets the output that contains the escaped data. */ + DataFlow::Node getOutput() { result = super.getOutput() } + + /** + * Gets the context that this function escapes for, such as `html`, or `url`. + */ + string getKind() { result = super.getKind() } +} + +/** Provides a class for modeling new escaping APIs. */ +module Escaping { + /** + * A data-flow node that escapes meta-characters, which could be used to prevent + * injection attacks. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `Escaping` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets an input that will be escaped. */ + abstract DataFlow::Node getAnInput(); + + /** Gets the output that contains the escaped data. */ + abstract DataFlow::Node getOutput(); + + /** + * Gets the context that this function escapes for. + * + * While kinds are represented as strings, this should not be relied upon. Use the + * predicates in the `Escaping` module, such as `getHtmlKind`. + */ + abstract string getKind(); + } + + /** Gets the escape-kind for escaping a string so it can safely be included in HTML. */ + string getHtmlKind() { result = "html" } +} + +/** + * An escape of a string so it can be safely included in + * the body of an HTML element, for example, replacing `{}` in + * `

{}

`. + */ +class HtmlEscaping extends Escaping { + HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() } +} + +/** Provides classes for modeling HTTP-related APIs. */ +module HTTP { + /** Provides classes for modeling HTTP servers. */ + module Server { + /** + * A data-flow node that sets up a route on a server. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RouteSetup::Range` instead. + */ + class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range { + /** Gets the URL pattern for this route, if it can be statically determined. */ + string getUrlPattern() { result = super.getUrlPattern() } + + /** + * Gets a function that will handle incoming requests for this route, if any. + * + * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`. + */ + Method getARequestHandler() { result = super.getARequestHandler() } + + /** + * Gets a parameter that will receive parts of the url when handling incoming + * requests for this route, if any. These automatically become a `RemoteFlowSource`. + */ + Parameter getARoutedParameter() { result = super.getARoutedParameter() } + + /** Gets a string that identifies the framework used for this route setup. */ + string getFramework() { result = super.getFramework() } + } + + /** Provides a class for modeling new HTTP routing APIs. */ + module RouteSetup { + /** + * A data-flow node that sets up a route on a server. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RouteSetup` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument used to set the URL pattern. */ + abstract DataFlow::Node getUrlPatternArg(); + + /** Gets the URL pattern for this route, if it can be statically determined. */ + string getUrlPattern() { + exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode | + this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(strNode) and + result = strNode.getExpr().getValueText() + ) + } + + /** + * Gets a function that will handle incoming requests for this route, if any. + * + * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`. + */ + abstract Method getARequestHandler(); + + /** + * Gets a parameter that will receive parts of the url when handling incoming + * requests for this route, if any. These automatically become a `RemoteFlowSource`. + */ + abstract Parameter getARoutedParameter(); + + /** Gets a string that identifies the framework used for this route setup. */ + abstract string getFramework(); + } + } + + /** + * A function that will handle incoming HTTP requests. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RequestHandler::Range` instead. + */ + class RequestHandler extends Method instanceof RequestHandler::Range { + /** + * Gets a parameter that could receive parts of the url when handling incoming + * requests, if any. These automatically become a `RemoteFlowSource`. + */ + Parameter getARoutedParameter() { result = super.getARoutedParameter() } + + /** Gets a string that identifies the framework used for this route setup. */ + string getFramework() { result = super.getFramework() } + } + + /** Provides a class for modeling new HTTP request handlers. */ + module RequestHandler { + /** + * A function that will handle incoming HTTP requests. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RequestHandler` instead. + * + * Only extend this class if you can't provide a `RouteSetup`, since we handle that case automatically. + */ + abstract class Range extends Method { + /** + * Gets a parameter that could receive parts of the url when handling incoming + * requests, if any. These automatically become a `RemoteFlowSource`. + */ + abstract Parameter getARoutedParameter(); + + /** Gets a string that identifies the framework used for this request handler. */ + abstract string getFramework(); + } + } + + private class RequestHandlerFromRouteSetup extends RequestHandler::Range { + RouteSetup rs; + + RequestHandlerFromRouteSetup() { this = rs.getARequestHandler() } + + override Parameter getARoutedParameter() { + result = rs.getARoutedParameter() and + result = this.getAParameter() + } + + override string getFramework() { result = rs.getFramework() } + } + + /** A parameter that will receive parts of the url when handling an incoming request. */ + private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode { + RequestHandler handler; + + RoutedParameter() { this.getParameter() = handler.getARoutedParameter() } + + override string getSourceType() { result = handler.getFramework() + " RoutedParameter" } + } + + /** + * A data-flow node that creates a HTTP response on a server. + * + * Note: we don't require that this response must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HttpResponse::Range` instead. + */ + class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range { + /** Gets the data-flow node that specifies the body of this HTTP response. */ + DataFlow::Node getBody() { result = super.getBody() } + + /** Gets the mimetype of this HTTP response, if it can be statically determined. */ + string getMimetype() { result = super.getMimetype() } + } + + /** Provides a class for modeling new HTTP response APIs. */ + module HttpResponse { + /** + * A data-flow node that creates a HTTP response on a server. + * + * Note: we don't require that this response must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HttpResponse` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the data-flow node that specifies the body of this HTTP response. */ + abstract DataFlow::Node getBody(); + + /** Gets the data-flow node that specifies the content-type/mimetype of this HTTP response, if any. */ + abstract DataFlow::Node getMimetypeOrContentTypeArg(); + + /** Gets the default mimetype that should be used if `getMimetypeOrContentTypeArg` has no results. */ + abstract string getMimetypeDefault(); + + /** Gets the mimetype of this HTTP response, if it can be statically determined. */ + string getMimetype() { + exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode | + this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(strNode) and + result = strNode.getExpr().getValueText().splitAt(";", 0) + ) + or + not exists(this.getMimetypeOrContentTypeArg()) and + result = this.getMimetypeDefault() + } + } + } + + /** + * A data-flow node that creates a HTTP redirect response on a server. + * + * Note: we don't require that this redirect must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HttpRedirectResponse::Range` instead. + */ + class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range { + /** Gets the data-flow node that specifies the location of this HTTP redirect response. */ + DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() } + } + + /** Provides a class for modeling new HTTP redirect response APIs. */ + module HttpRedirectResponse { + /** + * A data-flow node that creates a HTTP redirect response on a server. + * + * Note: we don't require that this redirect must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HttpResponse` instead. + */ + abstract class Range extends HTTP::Server::HttpResponse::Range { + /** Gets the data-flow node that specifies the location of this HTTP redirect response. */ + abstract DataFlow::Node getRedirectLocation(); + } + } + } + + /** Provides classes for modeling HTTP clients. */ + module Client { + /** + * A method call that makes an outgoing HTTP request. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `Request::Range` instead. + */ + class Request extends MethodCall instanceof Request::Range { + /** Gets a node which returns the body of the response */ + DataFlow::Node getResponseBody() { result = super.getResponseBody() } + + /** Gets a string that identifies the framework used for this request. */ + string getFramework() { result = super.getFramework() } + + /** + * Holds if this request is made using a mode that disables SSL/TLS + * certificate validation, where `disablingNode` represents the point at + * which the validation was disabled. + */ + predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + super.disablesCertificateValidation(disablingNode) + } + } + + /** Provides a class for modeling new HTTP requests. */ + module Request { + /** + * A method call that makes an outgoing HTTP request. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `Request` instead. + */ + abstract class Range extends MethodCall { + /** Gets a node which returns the body of the response */ + abstract DataFlow::Node getResponseBody(); + + /** Gets a string that identifies the framework used for this request. */ + abstract string getFramework(); + + /** + * Holds if this request is made using a mode that disables SSL/TLS + * certificate validation, where `disablingNode` represents the point at + * which the validation was disabled. + */ + abstract predicate disablesCertificateValidation(DataFlow::Node disablingNode); + } + } + + /** The response body from an outgoing HTTP request, considered as a remote flow source */ + private class RequestResponseBody extends RemoteFlowSource::Range, DataFlow::Node { + Request request; + + RequestResponseBody() { this = request.getResponseBody() } + + override string getSourceType() { result = request.getFramework() } + } + } +} + +/** + * A data flow node that executes an operating system command, + * for instance by spawning a new process. + */ +class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range { + /** Holds if a shell interprets `arg`. */ + predicate isShellInterpreted(DataFlow::Node arg) { super.isShellInterpreted(arg) } + + /** Gets an argument to this execution that specifies the command or an argument to it. */ + DataFlow::Node getAnArgument() { result = super.getAnArgument() } +} + +/** Provides a class for modeling new operating system command APIs. */ +module SystemCommandExecution { + /** + * A data flow node that executes an operating system command, for instance by spawning a new + * process. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SystemCommandExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets an argument to this execution that specifies the command or an argument to it. */ + abstract DataFlow::Node getAnArgument(); + + /** Holds if a shell interprets `arg`. */ + predicate isShellInterpreted(DataFlow::Node arg) { none() } + } +} + +/** + * A data-flow node that dynamically executes Ruby code. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `CodeExecution::Range` instead. + */ +class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range { + /** Gets the argument that specifies the code to be executed. */ + DataFlow::Node getCode() { result = super.getCode() } +} + +/** Provides a class for modeling new dynamic code execution APIs. */ +module CodeExecution { + /** + * A data-flow node that dynamically executes Ruby code. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `CodeExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument that specifies the code to be executed. */ + abstract DataFlow::Node getCode(); + } +} + +/** + * A data-flow node that parses XML content. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `XmlParserCall::Range` instead. + */ +class XmlParserCall extends DataFlow::Node { + XmlParserCall::Range range; + + XmlParserCall() { this = range } + + /** Gets the argument that specifies the XML content to be parsed. */ + DataFlow::Node getInput() { result = range.getInput() } + + /** Holds if this XML parser call is configured to process external entities */ + predicate externalEntitiesEnabled() { range.externalEntitiesEnabled() } +} + +/** Provides a class for modeling new XML parsing APIs. */ +module XmlParserCall { + /** + * A data-flow node that parses XML content. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `class XmlParserCall` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument that specifies the XML content to be parsed. */ + abstract DataFlow::Node getInput(); + + /** Holds if this XML parser call is configured to process external entities */ + abstract predicate externalEntitiesEnabled(); + } +} + +/** + * A data-flow node that may represent a database object in an ORM system. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `OrmInstantiation::Range` instead. + */ +class OrmInstantiation extends DataFlow::Node instanceof OrmInstantiation::Range { + /** Holds if a call to `methodName` on this instance may return a field of this ORM object. */ + bindingset[methodName] + predicate methodCallMayAccessField(string methodName) { + super.methodCallMayAccessField(methodName) + } +} + +/** Provides a class for modeling new ORM object instantiation APIs. */ +module OrmInstantiation { + /** + * A data-flow node that may represent a database object in an ORM system. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `OrmInstantiation` instead. + */ + abstract class Range extends DataFlow::Node { + /** Holds if a call to `methodName` on this instance may return a field of this ORM object. */ + bindingset[methodName] + abstract predicate methodCallMayAccessField(string methodName); + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll new file mode 100644 index 00000000000..e7645ce0c10 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow { + import codeql.ruby.dataflow.internal.DataFlowImpl +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll new file mode 100644 index 00000000000..7486f52052d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow2 { + import codeql.ruby.dataflow.internal.DataFlowImpl2 +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll new file mode 100644 index 00000000000..b8995c01bc2 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll @@ -0,0 +1,52 @@ +private import codeql.Locations + +/** A diagnostic emitted during extraction, such as a parse error */ +class Diagnostic extends @diagnostic { + int severity; + string tag; + string message; + string fullMessage; + Location location; + + Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) } + + /** + * Gets the numerical severity level associated with this diagnostic. + */ + int getSeverity() { result = severity } + + /** Gets a string representation of the severity of this diagnostic. */ + string getSeverityText() { + severity = 10 and result = "Debug" + or + severity = 20 and result = "Info" + or + severity = 30 and result = "Warning" + or + severity = 40 and result = "Error" + } + + /** Gets the error code associated with this diagnostic, e.g. parse_error. */ + string getTag() { result = tag } + + /** + * Gets the error message text associated with this diagnostic. + */ + string getMessage() { result = message } + + /** + * Gets the full error message text associated with this diagnostic. + */ + string getFullMessage() { result = fullMessage } + + /** Gets the source location of this diagnostic. */ + Location getLocation() { result = location } + + /** Gets a textual representation of this diagnostic. */ + string toString() { result = this.getMessage() } +} + +/** A diagnostic relating to a particular error in extracting a file. */ +class ExtractionError extends Diagnostic, @diagnostic_error { + ExtractionError() { this.getTag() = "parse_error" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll new file mode 100644 index 00000000000..bd75177c401 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll @@ -0,0 +1,11 @@ +/** + * Helper file that imports all framework modeling. + */ + +private import codeql.ruby.frameworks.ActionController +private import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.frameworks.ActionView +private import codeql.ruby.frameworks.StandardLibrary +private import codeql.ruby.frameworks.Files +private import codeql.ruby.frameworks.HttpClients +private import codeql.ruby.frameworks.XmlParsing diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll new file mode 100755 index 00000000000..e443b294273 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking { + import codeql.ruby.dataflow.internal.tainttracking1.TaintTrackingImpl +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll new file mode 100644 index 00000000000..d34034f14cd --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll @@ -0,0 +1,215 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Call +private import internal.TreeSitter +private import codeql.ruby.dataflow.internal.DataFlowDispatch +private import codeql.ruby.dataflow.internal.DataFlowImplCommon + +/** + * A call. + */ +class Call extends Expr instanceof CallImpl { + override string getAPrimaryQlClass() { result = "Call" } + + /** + * Gets the `n`th argument of this method call. In the following example, the + * result for n=0 is the `IntegerLiteral` 0, while for n=1 the result is a + * `Pair` (whose `getKey` returns the `SymbolLiteral` for `bar`, and + * `getValue` returns the `IntegerLiteral` 1). Keyword arguments like this + * can be accessed more naturally using the + * `getKeywordArgument(string keyword)` predicate. + * ```rb + * foo(0, bar: 1) + * yield 0, bar: 1 + * ``` + */ + final Expr getArgument(int n) { result = super.getArgumentImpl(n) } + + /** + * Gets an argument of this method call. + */ + final Expr getAnArgument() { result = this.getArgument(_) } + + /** + * Gets the value of the keyword argument whose key is `keyword`, if any. For + * example, the result for `getKeywordArgument("qux")` in the following + * example is the `IntegerLiteral` 123. + * ```rb + * foo :bar "baz", qux: 123 + * ``` + */ + final Expr getKeywordArgument(string keyword) { + exists(Pair p | + p = this.getAnArgument() and + p.getKey().(SymbolLiteral).getValueText() = keyword and + result = p.getValue() + ) + } + + /** + * Gets the number of arguments of this method call. + */ + final int getNumberOfArguments() { result = super.getNumberOfArgumentsImpl() } + + /** Gets a potential target of this call, if any. */ + final Callable getATarget() { + exists(DataFlowCall c | this = c.asCall().getExpr() | + TCfgScope(result) = [viableCallable(c), viableCallableLambda(c, _)] + ) + } + + override AstNode getAChild(string pred) { + result = Expr.super.getAChild(pred) + or + pred = "getArgument" and result = this.getArgument(_) + } +} + +/** + * A method call. + */ +class MethodCall extends Call instanceof MethodCallImpl { + override string getAPrimaryQlClass() { result = "MethodCall" } + + /** + * Gets the receiver of this call, if any. For example: + * + * ```rb + * foo.bar + * Baz::qux + * corge() + * ``` + * + * The result for the call to `bar` is the `Expr` for `foo`; the result for + * the call to `qux` is the `Expr` for `Baz`; for the call to `corge` there + * is no result. + */ + final Expr getReceiver() { result = super.getReceiverImpl() } + + /** + * Gets the name of the method being called. For example, in: + * + * ```rb + * foo.bar x, y + * ``` + * + * the result is `"bar"`. + */ + final string getMethodName() { result = super.getMethodNameImpl() } + + /** + * Gets the block of this method call, if any. + * ```rb + * foo.each { |x| puts x } + * ``` + */ + final Block getBlock() { result = super.getBlockImpl() } + + override string toString() { result = "call to " + this.getMethodName() } + + override AstNode getAChild(string pred) { + result = Call.super.getAChild(pred) + or + pred = "getReceiver" and result = this.getReceiver() + or + pred = "getBlock" and result = this.getBlock() + } +} + +/** + * A call to a setter method. + * ```rb + * self.foo = 10 + * a[0] = 10 + * ``` + */ +class SetterMethodCall extends MethodCall, TMethodCallSynth { + SetterMethodCall() { this = TMethodCallSynth(_, _, _, true, _) } + + final override string getAPrimaryQlClass() { result = "SetterMethodCall" } +} + +/** + * An element reference; a call to the `[]` method. + * ```rb + * a[0] + * ``` + */ +class ElementReference extends MethodCall instanceof ElementReferenceImpl { + final override string getAPrimaryQlClass() { result = "ElementReference" } + + final override string toString() { result = "...[...]" } +} + +/** + * A call to `yield`. + * ```rb + * yield x, y + * ``` + */ +class YieldCall extends Call instanceof YieldCallImpl { + final override string getAPrimaryQlClass() { result = "YieldCall" } + + final override string toString() { result = "yield ..." } +} + +/** + * A call to `super`. + * ```rb + * class Foo < Bar + * def baz + * super + * end + * end + * ``` + */ +class SuperCall extends MethodCall instanceof SuperCallImpl { + final override string getAPrimaryQlClass() { result = "SuperCall" } +} + +/** + * A block argument in a method call. + * ```rb + * foo(&block) + * ``` + */ +class BlockArgument extends Expr, TBlockArgument { + private Ruby::BlockArgument g; + + BlockArgument() { this = TBlockArgument(g) } + + final override string getAPrimaryQlClass() { result = "BlockArgument" } + + /** + * Gets the underlying expression representing the block. In the following + * example, the result is the `Expr` for `bar`: + * ```rb + * foo(&bar) + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "&..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + } +} + +/** + * A `...` expression that contains forwarded arguments. + * ```rb + * foo(...) + * ``` + */ +class ForwardedArguments extends Expr, TForwardArgument { + private Ruby::ForwardArgument g; + + ForwardedArguments() { this = TForwardArgument(g) } + + final override string getAPrimaryQlClass() { result = "ForwardedArguments" } + + final override string toString() { result = "..." } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll new file mode 100644 index 00000000000..11683d694b7 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll @@ -0,0 +1,210 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Module +private import internal.Variable +private import internal.TreeSitter + +/** An access to a constant. */ +class ConstantAccess extends Expr, TConstantAccess { + /** Gets the name of the constant being accessed. */ + string getName() { none() } + + /** Holds if the name of the constant being accessed is `name`. */ + final predicate hasName(string name) { this.getName() = name } + + /** + * Gets the expression used in the access's scope resolution operation, if + * any. In the following example, the result is the `Call` expression for + * `foo()`. + * + * ```rb + * foo()::MESSAGE + * ``` + * + * However, there is no result for the following example, since there is no + * scope resolution operation. + * + * ```rb + * MESSAGE + * ``` + */ + Expr getScopeExpr() { none() } + + /** + * Holds if the access uses the scope resolution operator to refer to the + * global scope, as in this example: + * + * ```rb + * ::MESSAGE + * ``` + */ + predicate hasGlobalScope() { none() } + + override string toString() { result = this.getName() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getScopeExpr" and result = this.getScopeExpr() + } +} + +private class TokenConstantAccess extends ConstantAccess, TTokenConstantAccess { + private Ruby::Constant g; + + TokenConstantAccess() { this = TTokenConstantAccess(g) } + + final override string getName() { result = g.getValue() } +} + +private class ScopeResolutionConstantAccess extends ConstantAccess, TScopeResolutionConstantAccess { + private Ruby::ScopeResolution g; + private Ruby::Constant constant; + + ScopeResolutionConstantAccess() { this = TScopeResolutionConstantAccess(g, constant) } + + final override string getName() { result = constant.getValue() } + + final override Expr getScopeExpr() { toGenerated(result) = g.getScope() } + + final override predicate hasGlobalScope() { not exists(g.getScope()) } +} + +private class ConstantReadAccessSynth extends ConstantAccess, TConstantReadAccessSynth { + private string value; + + ConstantReadAccessSynth() { this = TConstantReadAccessSynth(_, _, value) } + + final override string getName() { + if this.hasGlobalScope() then result = value.suffix(2) else result = value + } + + final override Expr getScopeExpr() { synthChild(this, 0, result) } + + final override predicate hasGlobalScope() { value.matches("::%") } +} + +/** + * A use (read) of a constant. + * + * For example, the right-hand side of the assignment in: + * + * ```rb + * x = Foo + * ``` + * + * Or the superclass `Bar` in this example: + * + * ```rb + * class Foo < Bar + * end + * ``` + */ +class ConstantReadAccess extends ConstantAccess { + ConstantReadAccess() { + not this instanceof ConstantWriteAccess + or + // `X` in `X ||= 10` is considered both a read and a write + this = any(AssignOperation a).getLeftOperand() + or + this instanceof TConstantReadAccessSynth + } + + /** + * Gets the value being read, if any. For example, in + * + * ```rb + * module M + * CONST = "const" + * end + * + * puts M::CONST + * ``` + * + * the value being read at `M::CONST` is `"const"`. + */ + Expr getValue() { + not exists(this.getScopeExpr()) and + result = lookupConst(this.getEnclosingModule+().getModule(), this.getName()) and + // For now, we restrict the scope of top-level declarations to their file. + // This may remove some plausible targets, but also removes a lot of + // implausible targets + if result.getEnclosingModule() instanceof Toplevel + then result.getFile() = this.getFile() + else any() + or + this.hasGlobalScope() and + result = lookupConst(TResolved("Object"), this.getName()) + or + result = lookupConst(resolveScopeExpr(this.getScopeExpr()), this.getName()) + } + + override string getValueText() { result = this.getValue().getValueText() } + + final override string getAPrimaryQlClass() { result = "ConstantReadAccess" } +} + +/** + * A definition of a constant. + * + * Examples: + * + * ```rb + * Foo = 1 # defines constant Foo as an integer + * M::Foo = 1 # defines constant Foo as an integer in module M + * + * class Bar; end # defines constant Bar as a class + * class M::Bar; end # defines constant Bar as a class in module M + * + * module Baz; end # defines constant Baz as a module + * module M::Baz; end # defines constant Baz as a module in module M + * ``` + */ +class ConstantWriteAccess extends ConstantAccess { + ConstantWriteAccess() { + explicitAssignmentNode(toGenerated(this), _) or this instanceof TNamespace + } + + override string getAPrimaryQlClass() { result = "ConstantWriteAccess" } + + /** + * Gets the fully qualified name for this constant, based on the context in + * which it is defined. + * + * For example, given + * ```rb + * module Foo + * module Bar + * class Baz + * end + * end + * CONST_A = "a" + * end + * ``` + * + * the constant `Baz` has the fully qualified name `Foo::Bar::Baz`, and + * `CONST_A` has the fully qualified name `Foo::CONST_A`. + */ + string getQualifiedName() { + /* get the qualified name for the parent module, then append w */ + exists(ConstantWriteAccess parent | parent = this.getEnclosingModule() | + result = parent.getQualifiedName() + "::" + this.getName() + ) + or + /* base case - there's no parent module */ + not exists(ConstantWriteAccess parent | parent = this.getEnclosingModule()) and + result = this.getName() + } +} + +/** + * A definition of a constant via assignment. For example, the left-hand + * operand in the following example: + * + * ```rb + * MAX_SIZE = 100 + * ``` + */ +class ConstantAssignment extends ConstantWriteAccess, LhsExpr { + override string getAPrimaryQlClass() { result = "ConstantAssignment" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll new file mode 100644 index 00000000000..33f52c02413 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll @@ -0,0 +1,611 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.TreeSitter + +/** + * A control expression that can be any of the following: + * - `case` + * - `if`/`unless` (including expression-modifier variants) + * - ternary-if (`?:`) + * - `while`/`until` (including expression-modifier variants) + * - `for` + */ +class ControlExpr extends Expr, TControlExpr { } + +/** + * A conditional expression: `if`/`unless` (including expression-modifier + * variants), and ternary-if (`?:`) expressions. + */ +class ConditionalExpr extends ControlExpr, TConditionalExpr { + /** + * Gets the condition expression. For example, the result is `foo` in the + * following: + * ```rb + * if foo + * bar = 1 + * end + * ``` + */ + Expr getCondition() { none() } + + /** + * Gets the branch of this conditional expression that is taken when the + * condition evaluates to `cond`, if any. + */ + Stmt getBranch(boolean cond) { none() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getCondition" and result = this.getCondition() + or + pred = "getBranch" and result = this.getBranch(_) + } +} + +/** + * An `if` or `elsif` expression. + * ```rb + * if x + * a += 1 + * elsif y + * a += 2 + * end + * ``` + */ +class IfExpr extends ConditionalExpr, TIfExpr { + final override string getAPrimaryQlClass() { result = "IfExpr" } + + /** Holds if this is an `elsif` expression. */ + predicate isElsif() { none() } + + /** Gets the 'then' branch of this `if`/`elsif` expression. */ + Stmt getThen() { none() } + + /** + * Gets the `elsif`/`else` branch of this `if`/`elsif` expression, if any. In + * the following example, the result is a `StmtSequence` containing `b`. + * ```rb + * if foo + * a + * else + * b + * end + * ``` + * But there is no result for the following: + * ```rb + * if foo + * a + * end + * ``` + * There can be at most one result, since `elsif` branches nest. In the + * following example, `ifExpr.getElse()` returns an `ElsifExpr`, and the + * `else` branch is nested inside that. To get the `StmtSequence` for the + * `else` branch, i.e. the one containing `c`, use + * `getElse().(ElsifExpr).getElse()`. + * ```rb + * if foo + * a + * elsif bar + * b + * else + * c + * end + * ``` + */ + Stmt getElse() { none() } + + final override Stmt getBranch(boolean cond) { + cond = true and result = this.getThen() + or + cond = false and result = this.getElse() + } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +private class If extends IfExpr, TIf { + private Ruby::If g; + + If() { this = TIf(g) } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getThen() { toGenerated(result) = g.getConsequence() } + + final override Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override string toString() { result = "if ..." } +} + +private class Elsif extends IfExpr, TElsif { + private Ruby::Elsif g; + + Elsif() { this = TElsif(g) } + + final override predicate isElsif() { any() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getThen() { toGenerated(result) = g.getConsequence() } + + final override Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override string toString() { result = "elsif ..." } +} + +/** + * An `unless` expression. + * ```rb + * unless x == 0 + * y /= x + * end + * ``` + */ +class UnlessExpr extends ConditionalExpr, TUnlessExpr { + private Ruby::Unless g; + + UnlessExpr() { this = TUnlessExpr(g) } + + final override string getAPrimaryQlClass() { result = "UnlessExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Gets the 'then' branch of this `unless` expression. In the following + * example, the result is the `StmtSequence` containing `foo`. + * ```rb + * unless a == b then + * foo + * else + * bar + * end + * ``` + */ + final Stmt getThen() { toGenerated(result) = g.getConsequence() } + + /** + * Gets the 'else' branch of this `unless` expression. In the following + * example, the result is the `StmtSequence` containing `bar`. + * ```rb + * unless a == b then + * foo + * else + * bar + * end + * ``` + */ + final Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override Expr getBranch(boolean cond) { + cond = false and result = getThen() + or + cond = true and result = getElse() + } + + final override string toString() { result = "unless ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +/** + * An expression modified using `if`. + * ```rb + * foo if bar + * ``` + */ +class IfModifierExpr extends ConditionalExpr, TIfModifierExpr { + private Ruby::IfModifier g; + + IfModifierExpr() { this = TIfModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "IfModifierExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getBranch(boolean cond) { cond = true and result = this.getBody() } + + /** + * Gets the statement that is conditionally evaluated. In the following + * example, the result is the `Expr` for `foo`. + * ```rb + * foo if bar + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "... if ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * An expression modified using `unless`. + * ```rb + * y /= x unless x == 0 + * ``` + */ +class UnlessModifierExpr extends ConditionalExpr, TUnlessModifierExpr { + private Ruby::UnlessModifier g; + + UnlessModifierExpr() { this = TUnlessModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "UnlessModifierExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getBranch(boolean cond) { cond = false and result = this.getBody() } + + /** + * Gets the statement that is conditionally evaluated. In the following + * example, the result is the `Expr` for `foo`. + * ```rb + * foo unless bar + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "... unless ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * A conditional expression using the ternary (`?:`) operator. + * ```rb + * (a > b) ? a : b + * ``` + */ +class TernaryIfExpr extends ConditionalExpr, TTernaryIfExpr { + private Ruby::Conditional g; + + TernaryIfExpr() { this = TTernaryIfExpr(g) } + + final override string getAPrimaryQlClass() { result = "TernaryIfExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** Gets the 'then' branch of this ternary if expression. */ + final Stmt getThen() { toGenerated(result) = g.getConsequence() } + + /** Gets the 'else' branch of this ternary if expression. */ + final Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override Stmt getBranch(boolean cond) { + cond = true and result = getThen() + or + cond = false and result = getElse() + } + + final override string toString() { result = "... ? ... : ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +class CaseExpr extends ControlExpr, TCaseExpr { + private Ruby::Case g; + + CaseExpr() { this = TCaseExpr(g) } + + final override string getAPrimaryQlClass() { result = "CaseExpr" } + + /** + * Gets the expression being compared, if any. For example, `foo` in the following example. + * ```rb + * case foo + * when 0 + * puts 'zero' + * when 1 + * puts 'one' + * end + * ``` + * There is no result for the following example: + * ```rb + * case + * when a then 0 + * when b then 1 + * else 2 + * end + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue() } + + /** + * Gets the `n`th branch of this case expression, either a `WhenExpr` or a + * `StmtSequence`. + */ + final Expr getBranch(int n) { toGenerated(result) = g.getChild(n) } + + /** + * Gets a branch of this case expression, either a `WhenExpr` or an + * `ElseExpr`. + */ + final Expr getABranch() { result = this.getBranch(_) } + + /** Gets a `when` branch of this case expression. */ + final WhenExpr getAWhenBranch() { result = getABranch() } + + /** Gets the `else` branch of this case expression, if any. */ + final StmtSequence getElseBranch() { result = getABranch() } + + /** + * Gets the number of branches of this case expression. + */ + final int getNumberOfBranches() { result = count(this.getBranch(_)) } + + final override string toString() { result = "case ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + or + pred = "getBranch" and result = this.getBranch(_) + } +} + +/** + * A `when` branch of a `case` expression. + * ```rb + * case + * when a > b then x + * end + * ``` + */ +class WhenExpr extends Expr, TWhenExpr { + private Ruby::When g; + + WhenExpr() { this = TWhenExpr(g) } + + final override string getAPrimaryQlClass() { result = "WhenExpr" } + + /** Gets the body of this case-when expression. */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + /** + * Gets the `n`th pattern (or condition) in this case-when expression. In the + * following example, the 0th pattern is `x`, the 1st pattern is `y`, and the + * 2nd pattern is `z`. + * ```rb + * case foo + * when x, y, z + * puts 'x/y/z' + * end + * ``` + */ + final Expr getPattern(int n) { toGenerated(result) = g.getPattern(n).getChild() } + + /** + * Gets a pattern (or condition) in this case-when expression. + */ + final Expr getAPattern() { result = this.getPattern(_) } + + /** + * Gets the number of patterns in this case-when expression. + */ + final int getNumberOfPatterns() { result = count(this.getPattern(_)) } + + final override string toString() { result = "when ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + or + pred = "getPattern" and result = this.getPattern(_) + } +} + +/** + * A loop. That is, a `for` loop, a `while` or `until` loop, or their + * expression-modifier variants. + */ +class Loop extends ControlExpr, TLoop { + /** Gets the body of this loop. */ + Stmt getBody() { none() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * A loop using a condition expression. That is, a `while` or `until` loop, or + * their expression-modifier variants. + */ +class ConditionalLoop extends Loop, TConditionalLoop { + /** Gets the condition expression of this loop. */ + Expr getCondition() { none() } + + override AstNode getAChild(string pred) { + result = Loop.super.getAChild(pred) + or + pred = "getCondition" and result = this.getCondition() + } + + /** Holds if the loop body is entered when the condition is `condValue`. */ + predicate entersLoopWhenConditionIs(boolean condValue) { none() } +} + +/** + * A `while` loop. + * ```rb + * while a < b + * p a + * a += 2 + * end + * ``` + */ +class WhileExpr extends ConditionalLoop, TWhileExpr { + private Ruby::While g; + + WhileExpr() { this = TWhileExpr(g) } + + final override string getAPrimaryQlClass() { result = "WhileExpr" } + + /** Gets the body of this `while` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `while` loops, this holds when `condValue` is true. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true } + + final override string toString() { result = "while ..." } +} + +/** + * An `until` loop. + * ```rb + * until a >= b + * p a + * a += 1 + * end + * ``` + */ +class UntilExpr extends ConditionalLoop, TUntilExpr { + private Ruby::Until g; + + UntilExpr() { this = TUntilExpr(g) } + + final override string getAPrimaryQlClass() { result = "UntilExpr" } + + /** Gets the body of this `until` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `until` loops, this holds when `condValue` is false. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false } + + final override string toString() { result = "until ..." } +} + +/** + * An expression looped using the `while` modifier. + * ```rb + * foo while bar + * ``` + */ +class WhileModifierExpr extends ConditionalLoop, TWhileModifierExpr { + private Ruby::WhileModifier g; + + WhileModifierExpr() { this = TWhileModifierExpr(g) } + + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `while`-modifier loops, this holds when `condValue` is true. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true } + + final override string getAPrimaryQlClass() { result = "WhileModifierExpr" } + + final override string toString() { result = "... while ..." } +} + +/** + * An expression looped using the `until` modifier. + * ```rb + * foo until bar + * ``` + */ +class UntilModifierExpr extends ConditionalLoop, TUntilModifierExpr { + private Ruby::UntilModifier g; + + UntilModifierExpr() { this = TUntilModifierExpr(g) } + + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `until`-modifier loops, this holds when `condValue` is false. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false } + + final override string getAPrimaryQlClass() { result = "UntilModifierExpr" } + + final override string toString() { result = "... until ..." } +} + +/** + * A `for` loop. + * ```rb + * for val in 1..n + * sum += val + * end + * ``` + */ +class ForExpr extends Loop, TForExpr { + private Ruby::For g; + + ForExpr() { this = TForExpr(g) } + + final override string getAPrimaryQlClass() { result = "ForExpr" } + + /** Gets the body of this `for` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + /** Gets the pattern representing the iteration argument. */ + final Pattern getPattern() { toGenerated(result) = g.getPattern() } + + /** + * Gets the value being iterated over. In the following example, the result + * is the expression `1..10`: + * ```rb + * for n in 1..10 do + * puts n + * end + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue().getChild() } + + final override string toString() { result = "for ... in ..." } + + override AstNode getAChild(string pred) { + result = Loop.super.getAChild(pred) + or + pred = "getPattern" and result = this.getPattern() + or + pred = "getValue" and result = this.getValue() + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll new file mode 100644 index 00000000000..52b14b70aa6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll @@ -0,0 +1,313 @@ +private import codeql.Locations +private import codeql.ruby.AST +private import internal.Erb +private import internal.TreeSitter + +/** + * A node in the ERB abstract syntax tree. This class is the base class for all + * ERB elements. + */ +class ErbAstNode extends TAstNode { + /** Gets a textual representation of this node. */ + cached + string toString() { none() } + + /** Gets the location of this node. */ + Location getLocation() { result = getLocation(this) } + + /** + * Gets the name of a primary CodeQL class to which this node belongs. + * + * This predicate always has a result. If no primary class can be + * determined, the result is `"???"`. If multiple primary classes match, + * this predicate can have multiple results. + */ + string getAPrimaryQlClass() { result = "???" } +} + +/** + * An ERB template. This can contain multiple directives to be executed when + * the template is compiled. + */ +class ErbTemplate extends TTemplate, ErbAstNode { + private Erb::Template g; + + ErbTemplate() { this = TTemplate(g) } + + override string toString() { result = "erb template" } + + final override string getAPrimaryQlClass() { result = "ErbTemplate" } + + ErbAstNode getAChildNode() { toGenerated(result) = g.getChild(_) } +} + +// Truncate the token string value to 32 char max +bindingset[val] +private string displayToken(string val) { + val.length() <= 32 and result = val + or + val.length() > 32 and result = val.prefix(29) + "..." +} + +/** + * An ERB token. This could be embedded code, a comment, or arbitrary text. + */ +class ErbToken extends TTokenNode, ErbAstNode { + override string toString() { result = displayToken(this.getValue()) } + + /** Gets the string value of this token. */ + string getValue() { exists(Erb::Token g | this = fromGenerated(g) | result = g.getValue()) } + + override string getAPrimaryQlClass() { result = "ErbToken" } +} + +/** + * An ERB token appearing within a comment directive. + */ +class ErbComment extends ErbToken { + private Erb::Comment g; + + ErbComment() { this = TComment(g) } + + override string getValue() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "ErbComment" } +} + +/** + * An ERB token appearing within a code directive. This will typically be + * interpreted as Ruby code or a GraphQL query, depending on context. + */ +class ErbCode extends ErbToken { + private Erb::Code g; + + ErbCode() { this = TCode(g) } + + override string getValue() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "ErbCode" } +} + +bindingset[line, col] +private predicate locationIncludesPosition(Location loc, int line, int col) { + // position between start and end line, exclusive + line > loc.getStartLine() and + line < loc.getEndLine() + or + // position on start line, multi line location + line = loc.getStartLine() and + not loc.getStartLine() = loc.getEndLine() and + col >= loc.getStartColumn() + or + // position on end line, multi line location + line = loc.getEndLine() and + not loc.getStartLine() = loc.getEndLine() and + col <= loc.getEndColumn() + or + // single line location, position between start and end column + line = loc.getStartLine() and + loc.getStartLine() = loc.getEndLine() and + col >= loc.getStartColumn() and + col <= loc.getEndColumn() +} + +/** A file containing an ERB directive. */ +private class ErbDirectiveFile extends File { + pragma[nomagic] + ErbDirectiveFile() { this = any(ErbDirective dir).getLocation().getFile() } + + /** Gets a statement in this file. */ + pragma[nomagic] + Stmt getAStmt(int startLine, int startColumn) { + exists(Location loc | + result.getLocation() = loc and + loc.getFile() = this and + loc.getStartLine() = startLine and + loc.getStartColumn() = startColumn + ) + } +} + +/** + * A directive in an ERB template. + */ +class ErbDirective extends TDirectiveNode, ErbAstNode { + /** Holds if this directive spans line `line` in the file `file`. */ + pragma[nomagic] + private predicate spans(ErbDirectiveFile file, int line) { + exists(Location loc | + loc = this.getLocation() and + file = loc.getFile() and + line in [loc.getStartLine() .. loc.getEndLine()] + ) + } + + private predicate containsStmtStart(Stmt s) { + // `Toplevel` statements are not contained within individual directives, + // though their start location may appear within a directive location + not s instanceof Toplevel and + exists(ErbDirectiveFile file, int startLine, int startColumn | + this.spans(file, startLine) and + s = file.getAStmt(startLine, startColumn) and + locationIncludesPosition(this.getLocation(), startLine, startColumn) + ) + } + + /** + * Gets a statement that starts in directive that is not a child of any other + * statement starting in this directive. + */ + Stmt getAChildStmt() { + this.containsStmtStart(result) and + not this.containsStmtStart(result.getParent()) + } + + /** + * Gets the last child statement in this directive. + * See `getAChildStmt` for more details. + */ + Stmt getTerminalStmt() { + result = this.getAChildStmt() and + forall(Stmt s | s = this.getAChildStmt() and not s = result | + s.getLocation().strictlyBefore(result.getLocation()) + ) + } + + /** Gets the child token of this directive. */ + ErbToken getToken() { + exists(Erb::Directive g | this = fromGenerated(g) | toGenerated(result) = g.getChild()) + } + + override string toString() { result = "erb directive" } + + override string getAPrimaryQlClass() { result = "ErbDirective" } +} + +/** + * A comment directive in an ERB template. + * ```erb + * <%#= 2 + 2 %> + * <%# for x in xs do %> + * ``` + */ +class ErbCommentDirective extends ErbDirective { + private Erb::CommentDirective g; + + ErbCommentDirective() { this = TCommentDirective(g) } + + override ErbComment getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%#" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbCommentDirective" } +} + +/** + * A GraphQL directive in an ERB template. + * ```erb + * <%graphql + * fragment Foo on Bar { + * some { + * queryText + * moreProperties + * } + * } + * %> + * ``` + */ +class ErbGraphqlDirective extends ErbDirective { + private Erb::GraphqlDirective g; + + ErbGraphqlDirective() { this = TGraphqlDirective(g) } + + override ErbCode getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%graphql" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbGraphqlDirective" } +} + +/** + * An output directive in an ERB template. + * ```erb + * <%= + * fragment Foo on Bar { + * some { + * queryText + * moreProperties + * } + * } + * %> + * ``` + */ +class ErbOutputDirective extends ErbDirective { + private Erb::OutputDirective g; + + ErbOutputDirective() { this = TOutputDirective(g) } + + override ErbCode getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%=" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbOutputDirective" } +} + +/** + * An execution directive in an ERB template. + * This code will be executed as Ruby, but not rendered. + * ```erb + * <% books = author.books + * for book in books do %> + * ``` + */ +class ErbExecutionDirective extends ErbDirective { + private Erb::Directive g; + + ErbExecutionDirective() { this = TDirective(g) } + + final override string toString() { result = "<%" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbExecutionDirective" } +} + +/** + * A `File` containing an Embedded Ruby template. + * This is typically a file containing snippets of Ruby code that can be + * evaluated to create a compiled version of the file. + */ +class ErbFile extends File { + private ErbTemplate template; + + ErbFile() { this = template.getLocation().getFile() } + + /** + * Holds if the file represents a partial to be rendered in the context of + * another template. + */ + predicate isPartial() { this.getStem().charAt(0) = "_" } + + /** + * Gets the base template name associated with this ERB file. + * For instance, a file named `foo.html.erb` has a template name of `foo`. + * A partial template file named `_item.html.erb` has a template name of `item`. + */ + string getTemplateName() { none() } + + /** + * Gets the erb template contained within this file. + */ + ErbTemplate getTemplate() { result = template } +} + +private class PartialErbFile extends ErbFile { + PartialErbFile() { this.isPartial() } + + // Drop the leading underscore + override string getTemplateName() { result = this.getStem().splitAt(".", 0).suffix(1) } +} + +private class FullErbFile extends ErbFile { + FullErbFile() { not this.isPartial() } + + override string getTemplateName() { result = this.getStem().splitAt(".", 0) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll new file mode 100644 index 00000000000..46b5bdd3d36 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll @@ -0,0 +1,456 @@ +private import codeql.ruby.AST +private import codeql.ruby.CFG +private import internal.AST +private import internal.TreeSitter + +/** + * An expression. + * + * This is the root QL class for all expressions. + */ +class Expr extends Stmt, TExpr { + /** Gets the textual (constant) value of this expression, if any. */ + string getValueText() { + forex(CfgNodes::ExprCfgNode n | n = this.getAControlFlowNode() | result = n.getValueText()) + } +} + +/** + * A reference to the current object. For example: + * - `self == other` + * - `self.method_name` + * - `def self.method_name ... end` + * + * This also includes implicit references to the current object in method + * calls. For example, the method call `foo(123)` has an implicit `self` + * receiver, and is equivalent to the explicit `self.foo(123)`. + */ +class Self extends Expr, TSelf { + final override string getAPrimaryQlClass() { result = "Self" } + + final override string toString() { result = "self" } +} + +/** + * A sequence of expressions in the right-hand side of an assignment or + * a `return`, `break` or `next` statement. + * ```rb + * x = 1, *items, 3, *more + * return 1, 2 + * next *list + * break **map + * return 1, 2, *items, k: 5, **map + * ``` + */ +class ArgumentList extends Expr, TArgumentList { + private Ruby::AstNode g; + + ArgumentList() { this = TArgumentList(g) } + + /** Gets the `i`th element in this argument list. */ + Expr getElement(int i) { + toGenerated(result) in [ + g.(Ruby::ArgumentList).getChild(i), g.(Ruby::RightAssignmentList).getChild(i) + ] + } + + final override string getAPrimaryQlClass() { result = "ArgumentList" } + + final override string toString() { result = "..., ..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getElement" and result = this.getElement(_) + } +} + +/** A sequence of expressions. */ +class StmtSequence extends Expr, TStmtSequence { + override string getAPrimaryQlClass() { result = "StmtSequence" } + + /** Gets the `n`th statement in this sequence. */ + Stmt getStmt(int n) { none() } + + /** Gets a statement in this sequence. */ + final Stmt getAStmt() { result = this.getStmt(_) } + + /** Gets the last statement in this sequence, if any. */ + final Stmt getLastStmt() { result = this.getStmt(this.getNumberOfStatements() - 1) } + + /** Gets the number of statements in this sequence. */ + final int getNumberOfStatements() { result = count(this.getAStmt()) } + + /** Holds if this sequence has no statements. */ + final predicate isEmpty() { this.getNumberOfStatements() = 0 } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getStmt" and result = this.getStmt(_) + } +} + +private class StmtSequenceSynth extends StmtSequence, TStmtSequenceSynth { + final override Stmt getStmt(int n) { synthChild(this, n, result) } + + final override string toString() { result = "..." } +} + +private class Then extends StmtSequence, TThen { + private Ruby::Then g; + + Then() { this = TThen(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "then ..." } +} + +private class Else extends StmtSequence, TElse { + private Ruby::Else g; + + Else() { this = TElse(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "else ..." } +} + +private class Do extends StmtSequence, TDo { + private Ruby::Do g; + + Do() { this = TDo(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "do ..." } +} + +private class Ensure extends StmtSequence, TEnsure { + private Ruby::Ensure g; + + Ensure() { this = TEnsure(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "ensure ..." } +} + +/** + * A sequence of statements representing the body of a method, class, module, + * or do-block. That is, any body that may also include rescue/ensure/else + * statements. + */ +class BodyStmt extends StmtSequence, TBodyStmt { + // Not defined by dispatch, as it should not be exposed + private Ruby::AstNode getChild(int i) { + result = any(Ruby::Method g | this = TMethod(g)).getChild(i) + or + result = any(Ruby::SingletonMethod g | this = TSingletonMethod(g)).getChild(i) + or + exists(Ruby::Lambda g | this = TLambda(g) | + result = g.getBody().(Ruby::DoBlock).getChild(i) or + result = g.getBody().(Ruby::Block).getChild(i) + ) + or + result = any(Ruby::DoBlock g | this = TDoBlock(g)).getChild(i) + or + result = any(Ruby::Program g | this = TToplevel(g)).getChild(i) and + not result instanceof Ruby::BeginBlock + or + result = any(Ruby::Class g | this = TClassDeclaration(g)).getChild(i) + or + result = any(Ruby::SingletonClass g | this = TSingletonClass(g)).getChild(i) + or + result = any(Ruby::Module g | this = TModuleDeclaration(g)).getChild(i) + or + result = any(Ruby::Begin g | this = TBeginExpr(g)).getChild(i) + } + + final override Stmt getStmt(int n) { + result = + rank[n + 1](AstNode node, int i | + toGenerated(node) = this.getChild(i) and + not node instanceof Else and + not node instanceof RescueClause and + not node instanceof Ensure + | + node order by i + ) + } + + /** Gets the `n`th rescue clause in this block. */ + final RescueClause getRescue(int n) { + result = + rank[n + 1](RescueClause node, int i | toGenerated(node) = getChild(i) | node order by i) + } + + /** Gets a rescue clause in this block. */ + final RescueClause getARescue() { result = this.getRescue(_) } + + /** Gets the `else` clause in this block, if any. */ + final StmtSequence getElse() { result = unique(Else s | toGenerated(s) = getChild(_)) } + + /** Gets the `ensure` clause in this block, if any. */ + final StmtSequence getEnsure() { result = unique(Ensure s | toGenerated(s) = getChild(_)) } + + final predicate hasEnsure() { exists(this.getEnsure()) } + + override AstNode getAChild(string pred) { + result = StmtSequence.super.getAChild(pred) + or + pred = "getRescue" and result = this.getRescue(_) + or + pred = "getElse" and result = this.getElse() + or + pred = "getEnsure" and result = this.getEnsure() + } +} + +/** + * A parenthesized expression sequence, typically containing a single expression: + * ```rb + * (x + 1) + * ``` + * However, they can also contain multiple expressions (the value of the parenthesized + * expression is the last expression): + * ```rb + * (foo; bar) + * ``` + * or even an empty sequence (value is `nil`): + * ```rb + * () + * ``` + */ +class ParenthesizedExpr extends StmtSequence, TParenthesizedExpr { + private Ruby::ParenthesizedStatements g; + + ParenthesizedExpr() { this = TParenthesizedExpr(g) } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string getAPrimaryQlClass() { result = "ParenthesizedExpr" } + + final override string toString() { result = "( ... )" } +} + +/** + * A pair expression. For example, in a hash: + * ```rb + * { foo: bar } + * ``` + * Or a keyword argument: + * ```rb + * baz(qux: 1) + * ``` + */ +class Pair extends Expr, TPair { + private Ruby::Pair g; + + Pair() { this = TPair(g) } + + final override string getAPrimaryQlClass() { result = "Pair" } + + /** + * Gets the key expression of this pair. For example, the `SymbolLiteral` + * representing the keyword `foo` in the following example: + * ```rb + * bar(foo: 123) + * ``` + * Or the `StringLiteral` for `'foo'` in the following hash pair: + * ```rb + * { 'foo' => 123 } + * ``` + */ + final Expr getKey() { toGenerated(result) = g.getKey() } + + /** + * Gets the value expression of this pair. For example, the `InteralLiteral` + * 123 in the following hash pair: + * ```rb + * { 'foo' => 123 } + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue() } + + final override string toString() { result = "Pair" } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getKey" and result = this.getKey() + or + pred = "getValue" and result = this.getValue() + } +} + +/** + * A rescue clause. For example: + * ```rb + * begin + * write_file + * rescue StandardError => msg + * puts msg + * end + */ +class RescueClause extends Expr, TRescueClause { + private Ruby::Rescue g; + + RescueClause() { this = TRescueClause(g) } + + final override string getAPrimaryQlClass() { result = "RescueClause" } + + /** + * Gets the `n`th exception to match, if any. For example `FirstError` or `SecondError` in: + * ```rb + * begin + * do_something + * rescue FirstError, SecondError => e + * handle_error(e) + * end + * ``` + */ + final Expr getException(int n) { toGenerated(result) = g.getExceptions().getChild(n) } + + /** + * Gets an exception to match, if any. For example `FirstError` or `SecondError` in: + * ```rb + * begin + * do_something + * rescue FirstError, SecondError => e + * handle_error(e) + * end + * ``` + */ + final Expr getAnException() { result = this.getException(_) } + + /** + * Gets the variable to which to assign the matched exception, if any. + * For example `err` in: + * ```rb + * begin + * do_something + * rescue StandardError => err + * handle_error(err) + * end + * ``` + */ + final LhsExpr getVariableExpr() { toGenerated(result) = g.getVariable().getChild() } + + /** + * Gets the exception handler body. + */ + final StmtSequence getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "rescue ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getException" and result = this.getException(_) + or + pred = "getVariableExpr" and result = this.getVariableExpr() + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * An expression with a `rescue` modifier. For example: + * ```rb + * contents = read_file rescue "" + * ``` + */ +class RescueModifierExpr extends Expr, TRescueModifierExpr { + private Ruby::RescueModifier g; + + RescueModifierExpr() { this = TRescueModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "RescueModifierExpr" } + + /** + * Gets the body of this `RescueModifierExpr`. + * ```rb + * body rescue handler + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + /** + * Gets the exception handler of this `RescueModifierExpr`. + * ```rb + * body rescue handler + * ``` + */ + final Stmt getHandler() { toGenerated(result) = g.getHandler() } + + final override string toString() { result = "... rescue ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + or + pred = "getHandler" and result = this.getHandler() + } +} + +/** + * A concatenation of string literals. + * + * ```rb + * "foo" "bar" "baz" + * ``` + */ +class StringConcatenation extends Expr, TStringConcatenation { + private Ruby::ChainedString g; + + StringConcatenation() { this = TStringConcatenation(g) } + + final override string getAPrimaryQlClass() { result = "StringConcatenation" } + + /** Gets the `n`th string literal in this concatenation. */ + final StringLiteral getString(int n) { toGenerated(result) = g.getChild(n) } + + /** Gets a string literal in this concatenation. */ + final StringLiteral getAString() { result = this.getString(_) } + + /** Gets the number of string literals in this concatenation. */ + final int getNumberOfStrings() { result = count(this.getString(_)) } + + /** + * Gets the result of concatenating all the string literals, if and only if + * they do not contain any interpolations. + * + * For the following example, the result is `"foobar"`: + * + * ```rb + * "foo" 'bar' + * ``` + * + * And for the following example, where one of the string literals includes + * an interpolation, there is no result: + * + * ```rb + * "foo" "bar#{ n }" + * ``` + */ + final string getConcatenatedValueText() { + forall(StringLiteral c | c = this.getString(_) | exists(c.getValueText())) and + result = + concat(string valueText, int i | + valueText = this.getString(i).getValueText() + | + valueText order by i + ) + } + + final override string toString() { result = "\"...\" \"...\"" } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getString" and result = this.getString(_) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll new file mode 100644 index 00000000000..3e9714e3ce6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll @@ -0,0 +1,892 @@ +private import codeql.ruby.AST +private import codeql.ruby.regexp.RegExpTreeView as RETV +private import internal.AST +private import internal.Scope +private import internal.TreeSitter + +/** + * A literal. + * + * This is the QL root class for all literals. + */ +class Literal extends Expr, TLiteral { + /** + * Gets the source text for this literal, if this is a simple literal. + * + * For complex literals, such as arrays, hashes, and strings with + * interpolations, this predicate has no result. + */ + override string getValueText() { none() } +} + +/** + * A numeric literal, i.e. an integer, floating-point, rational, or complex + * value. + * + * ```rb + * 123 + * 0xff + * 3.14159 + * 1.0E2 + * 7r + * 1i + * ``` + */ +class NumericLiteral extends Literal, TNumericLiteral { } + +/** + * An integer literal. + * + * ```rb + * 123 + * 0xff + * ``` + */ +class IntegerLiteral extends NumericLiteral, TIntegerLiteral { + /** Gets the numerical value of this integer literal. */ + int getValue() { none() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "IntegerLiteral" } +} + +private class IntegerLiteralReal extends IntegerLiteral, TIntegerLiteralReal { + private Ruby::Integer g; + + IntegerLiteralReal() { this = TIntegerLiteralReal(g) } + + final override string getValueText() { result = g.getValue() } + + final override int getValue() { + exists(string s, string values, string str | + s = this.getValueText().toLowerCase() and + ( + s.matches("0b%") and + values = "01" and + str = s.suffix(2) + or + s.matches("0x%") and + values = "0123456789abcdef" and + str = s.suffix(2) + or + s.charAt(0) = "0" and + not s.charAt(1) = ["b", "x", "o"] and + values = "01234567" and + str = s.suffix(1) + or + s.matches("0o%") and + values = "01234567" and + str = s.suffix(2) + or + s.charAt(0) != "0" and values = "0123456789" and str = s + ) + | + result = + sum(int index, string c, int v, int exp | + c = str.replaceAll("_", "").charAt(index) and + v = values.indexOf(c.toLowerCase()) and + exp = str.replaceAll("_", "").length() - index - 1 + | + v * values.length().pow(exp) + ) + ) + } +} + +private class IntegerLiteralSynth extends IntegerLiteral, TIntegerLiteralSynth { + private int value; + + IntegerLiteralSynth() { this = TIntegerLiteralSynth(_, _, value) } + + final override string getValueText() { result = value.toString() } + + final override int getValue() { result = value } +} + +/** + * A floating-point literal. + * + * ```rb + * 1.3 + * 2.7e+5 + * ``` + */ +class FloatLiteral extends NumericLiteral, TFloatLiteral { + private Ruby::Float g; + + FloatLiteral() { this = TFloatLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "FloatLiteral" } +} + +/** + * A rational literal. + * + * ```rb + * 123r + * ``` + */ +class RationalLiteral extends NumericLiteral, TRationalLiteral { + private Ruby::Rational g; + + RationalLiteral() { this = TRationalLiteral(g) } + + final override string getValueText() { result = g.getChild().(Ruby::Token).getValue() + "r" } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "RationalLiteral" } +} + +/** + * A complex literal. + * + * ```rb + * 1i + * ``` + */ +class ComplexLiteral extends NumericLiteral, TComplexLiteral { + private Ruby::Complex g; + + ComplexLiteral() { this = TComplexLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "ComplexLiteral" } +} + +/** A `nil` literal. */ +class NilLiteral extends Literal, TNilLiteral { + private Ruby::Nil g; + + NilLiteral() { this = TNilLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "NilLiteral" } +} + +/** + * A Boolean literal. + * ```rb + * true + * false + * TRUE + * FALSE + * ``` + */ +class BooleanLiteral extends Literal, TBooleanLiteral { + final override string getAPrimaryQlClass() { result = "BooleanLiteral" } + + final override string toString() { result = this.getValueText() } + + /** Holds if the Boolean literal is `true` or `TRUE`. */ + predicate isTrue() { none() } + + /** Holds if the Boolean literal is `false` or `FALSE`. */ + predicate isFalse() { none() } + + /** Gets the value of this Boolean literal. */ + boolean getValue() { + this.isTrue() and result = true + or + this.isFalse() and result = false + } +} + +private class TrueLiteral extends BooleanLiteral, TTrueLiteral { + private Ruby::True g; + + TrueLiteral() { this = TTrueLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override predicate isTrue() { any() } +} + +private class FalseLiteral extends BooleanLiteral, TFalseLiteral { + private Ruby::False g; + + FalseLiteral() { this = TFalseLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override predicate isFalse() { any() } +} + +/** + * The base class for a component of a string: `StringTextComponent`, + * `StringEscapeSequenceComponent`, or `StringInterpolationComponent`. + */ +class StringComponent extends AstNode, TStringComponent { + /** + * Gets the source text for this string component. Has no result if this is + * a `StringInterpolationComponent`. + */ + string getValueText() { none() } +} + +/** + * A component of a string (or string-like) literal that is simply text. + * + * For example, the following string literals all contain `StringTextComponent` + * components whose `getValueText()` returns `"foo"`: + * + * ```rb + * 'foo' + * "#{ bar() }foo" + * "foo#{ bar() } baz" + * ``` + */ +class StringTextComponent extends StringComponent, TStringTextComponent { + private Ruby::Token g; + + StringTextComponent() { this = TStringTextComponent(g) } + + final override string toString() { result = g.getValue() } + + final override string getValueText() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "StringTextComponent" } +} + +/** + * An escape sequence component of a string or string-like literal. + */ +class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent { + private Ruby::EscapeSequence g; + + StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) } + + final override string toString() { result = g.getValue() } + + final override string getValueText() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" } +} + +/** + * An interpolation expression component of a string or string-like literal. + */ +class StringInterpolationComponent extends StringComponent, StmtSequence, + TStringInterpolationComponent { + private Ruby::Interpolation g; + + StringInterpolationComponent() { this = TStringInterpolationComponent(g) } + + final override string toString() { result = "#{...}" } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string getValueText() { none() } + + final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" } +} + +/** + * A string, symbol, regexp, or subshell literal. + */ +class StringlikeLiteral extends Literal, TStringlikeLiteral { + /** + * Gets the `n`th component of this string or string-like literal. The result + * will be one of `StringTextComponent`, `StringInterpolationComponent`, and + * `StringEscapeSequenceComponent`. + * + * In the following example, the result for `n = 0` is the + * `StringTextComponent` for `foo_`, and the result for `n = 1` is the + * `StringInterpolationComponent` for `Time.now`. + * + * ```rb + * "foo_#{ Time.now }" + * ``` + */ + StringComponent getComponent(int n) { none() } + + /** + * Gets the number of components in this string or string-like literal. + * + * For the empty string `""`, the result is 0. + * + * For the string `"foo"`, the result is 1: there is a single + * `StringTextComponent`. + * + * For the following example, the result is 3: there is a + * `StringTextComponent` for the substring `"foo_"`; a + * `StringEscapeSequenceComponent` for the escaped quote; and a + * `StringInterpolationComponent` for the interpolation. + * + * ```rb + * "foo\"#{bar}" + * ``` + */ + final int getNumberOfComponents() { result = count(this.getComponent(_)) } + + private string getStartDelimiter() { + this instanceof TStringLiteral and + result = "\"" + or + this instanceof TRegExpLiteral and + result = "/" + or + this instanceof TSimpleSymbolLiteral and + result = ":" + or + this instanceof TComplexSymbolLiteral and + result = ":\"" + or + this instanceof THashKeySymbolLiteral and + result = "" + or + this instanceof TSubshellLiteral and + result = "`" + or + this instanceof THereDoc and + result = "" + } + + private string getEndDelimiter() { + this instanceof TStringLiteral and + result = "\"" + or + this instanceof TRegExpLiteral and + result = "/" + or + this instanceof TSimpleSymbolLiteral and + result = "" + or + this instanceof TComplexSymbolLiteral and + result = "\"" + or + this instanceof THashKeySymbolLiteral and + result = "" + or + this instanceof TSubshellLiteral and + result = "`" + or + this instanceof THereDoc and + result = "" + } + + override string getValueText() { + // 0 components should result in the empty string + // if there are any interpolations, there should be no result + // otherwise, concatenate all the components + forall(StringComponent c | c = this.getComponent(_) | + not c instanceof StringInterpolationComponent + ) and + result = + concat(StringComponent c, int i | c = this.getComponent(i) | c.getValueText() order by i) + } + + override string toString() { + exists(string full, string summary | + full = + concat(StringComponent c, int i, string s | + c = this.getComponent(i) and + ( + s = toGenerated(c).(Ruby::Token).getValue() + or + not toGenerated(c) instanceof Ruby::Token and + s = "#{...}" + ) + | + s order by i + ) and + ( + // summary should be 32 chars max (incl. ellipsis) + full.length() > 32 and summary = full.substring(0, 29) + "..." + or + full.length() <= 32 and summary = full + ) and + result = this.getStartDelimiter() + summary + this.getEndDelimiter() + ) + } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getComponent" and result = this.getComponent(_) + } +} + +/** + * A string literal. + * + * ```rb + * 'hello' + * "hello, #{name}" + * ``` + */ +class StringLiteral extends StringlikeLiteral, TStringLiteral { + final override string getAPrimaryQlClass() { result = "StringLiteral" } +} + +private class RegularStringLiteral extends StringLiteral, TRegularStringLiteral { + private Ruby::String g; + + RegularStringLiteral() { this = TRegularStringLiteral(g) } + + final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) } +} + +private class BareStringLiteral extends StringLiteral, TBareStringLiteral { + private Ruby::BareString g; + + BareStringLiteral() { this = TBareStringLiteral(g) } + + final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) } +} + +/** + * A regular expression literal. + * + * ```rb + * /[a-z]+/ + * ``` + */ +class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral { + private Ruby::Regex g; + + RegExpLiteral() { this = TRegExpLiteral(g) } + + final override string getAPrimaryQlClass() { result = "RegExpLiteral" } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } + + /** + * Gets the regexp flags as a string. + * + * ```rb + * /foo/ # => "" + * /foo/i # => "i" + * /foo/imxo # => "imxo" + */ + final string getFlagString() { + // For `/foo/i`, there should be an `/i` token in the database with `this` + // as its parents. Strip the delimiter, which can vary. + result = + max(Ruby::Token t | t.getParent() = g | t.getValue().suffix(1) order by t.getParentIndex()) + } + + /** + * Holds if the regexp was specified using the `i` flag to indicate case + * insensitivity, as in the following example: + * + * ```rb + * /foo/i + * ``` + */ + final predicate hasCaseInsensitiveFlag() { this.getFlagString().charAt(_) = "i" } + + /** + * Holds if the regex was specified using the `m` flag to indicate multiline + * mode. For example: + * + * ```rb + * /foo/m + * ``` + */ + final predicate hasMultilineFlag() { this.getFlagString().charAt(_) = "m" } + + /** + * Holds if the regex was specified using the `x` flag to indicate + * 'free-spacing' mode (also known as 'extended' mode), meaning that + * whitespace and comments in the pattern are ignored. For example: + * + * ```rb + * %r{ + * [a-zA-Z_] # starts with a letter or underscore + * \w* # and then zero or more letters/digits/underscores + * }/x + * ``` + */ + final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" } + + /** Returns the root node of the parse tree of this regular expression. */ + final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) } +} + +/** + * A symbol literal. + * + * ```rb + * :foo + * :"foo bar" + * :"foo bar #{baz}" + * ``` + */ +class SymbolLiteral extends StringlikeLiteral, TSymbolLiteral { + final override string getAPrimaryQlClass() { + not this instanceof MethodName and result = "SymbolLiteral" + } +} + +private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral { + private Ruby::SimpleSymbol g; + + SimpleSymbolLiteral() { this = TSimpleSymbolLiteral(g) } + + // Tree-sitter gives us value text including the colon, which we skip. + final override string getValueText() { result = g.getValue().suffix(1) } + + final override string toString() { result = g.getValue() } +} + +private class ComplexSymbolLiteral extends SymbolLiteral, TComplexSymbolLiteral { } + +private class DelimitedSymbolLiteral extends ComplexSymbolLiteral, TDelimitedSymbolLiteral { + private Ruby::DelimitedSymbol g; + + DelimitedSymbolLiteral() { this = TDelimitedSymbolLiteral(g) } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +private class BareSymbolLiteral extends ComplexSymbolLiteral, TBareSymbolLiteral { + private Ruby::BareSymbol g; + + BareSymbolLiteral() { this = TBareSymbolLiteral(g) } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +private class HashKeySymbolLiteral extends SymbolLiteral, THashKeySymbolLiteral { + private Ruby::HashKeySymbol g; + + HashKeySymbolLiteral() { this = THashKeySymbolLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = ":" + this.getValueText() } +} + +/** + * A subshell literal. + * + * ```rb + * `ls -l` + * %x(/bin/sh foo.sh) + * ``` + */ +class SubshellLiteral extends StringlikeLiteral, TSubshellLiteral { + private Ruby::Subshell g; + + SubshellLiteral() { this = TSubshellLiteral(g) } + + final override string getAPrimaryQlClass() { result = "SubshellLiteral" } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +/** + * A character literal. + * + * ```rb + * ?a + * ?\u{61} + * ``` + */ +class CharacterLiteral extends Literal, TCharacterLiteral { + private Ruby::Character g; + + CharacterLiteral() { this = TCharacterLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "CharacterLiteral" } +} + +/** + * A "here document". For example: + * ```rb + * query = < 21 + * SQL + * ``` + */ +class HereDoc extends StringlikeLiteral, THereDoc { + private Ruby::HeredocBeginning g; + + HereDoc() { this = THereDoc(g) } + + final override string getAPrimaryQlClass() { result = "HereDoc" } + + /** + * Holds if this here document is executed in a subshell. + * ```rb + * <<`COMMAND` + * echo "Hello world!" + * COMMAND + * ``` + */ + final predicate isSubShell() { getQuoteStyle() = "`" } + + /** + * Gets the quotation mark (`"`, `'` or `` ` ``) that surrounds the here document identifier, if any. + * ```rb + * <<"IDENTIFIER" + * <<'IDENTIFIER' + * <<`IDENTIFIER` + * ``` + */ + final string getQuoteStyle() { + exists(string s | + s = g.getValue() and + s.charAt(s.length() - 1) = result and + result = ["'", "`", "\""] + ) + } + + /** + * Gets the indentation modifier (`-` or `~`) of the here document identifier, if any. + * ```rb + * <<~IDENTIFIER + * <<-IDENTIFIER + * < (x) { x + 1 } + * ``` + */ +class Lambda extends Callable, BodyStmt, TLambda { + private Ruby::Lambda g; + + Lambda() { this = TLambda(g) } + + final override string getAPrimaryQlClass() { result = "Lambda" } + + final override Parameter getParameter(int n) { + toGenerated(result) = g.getParameters().getChild(n) + } + + final override string toString() { result = "-> { ... }" } + + final override AstNode getAChild(string pred) { + result = Callable.super.getAChild(pred) + or + result = BodyStmt.super.getAChild(pred) + } +} + +/** A block. */ +class Block extends Callable, StmtSequence, Scope, TBlock { + override AstNode getAChild(string pred) { + result = Callable.super.getAChild(pred) + or + result = StmtSequence.super.getAChild(pred) + } +} + +/** A block enclosed within `do` and `end`. */ +class DoBlock extends Block, BodyStmt, TDoBlock { + private Ruby::DoBlock g; + + DoBlock() { this = TDoBlock(g) } + + final override Parameter getParameter(int n) { + toGenerated(result) = g.getParameters().getChild(n) + } + + final override string toString() { result = "do ... end" } + + final override AstNode getAChild(string pred) { + result = Block.super.getAChild(pred) + or + result = BodyStmt.super.getAChild(pred) + } + + final override string getAPrimaryQlClass() { result = "DoBlock" } +} + +/** + * A block defined using curly braces, e.g. in the following code: + * ```rb + * names.each { |name| puts name } + * ``` + */ +class BraceBlock extends Block, TBraceBlock { + private Ruby::Block g; + + BraceBlock() { this = TBraceBlock(g) } + + final override Parameter getParameter(int n) { + toGenerated(result) = g.getParameters().getChild(n) + } + + final override Stmt getStmt(int i) { toGenerated(result) = g.getChild(i) } + + final override string toString() { result = "{ ... }" } + + final override string getAPrimaryQlClass() { result = "BraceBlock" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll new file mode 100644 index 00000000000..6a67c35a30d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll @@ -0,0 +1,365 @@ +private import codeql.ruby.AST +private import codeql.ruby.ast.Constant +private import internal.AST +private import internal.Module +private import internal.TreeSitter + +/** + * A representation of a run-time `module` or `class` value. + */ +class Module extends TModule { + /** Gets a declaration of this module, if any. */ + ModuleBase getADeclaration() { result.getModule() = this } + + /** Gets the super class of this module, if any. */ + Module getSuperClass() { result = getSuperClass(this) } + + /** Gets a `prepend`ed module. */ + Module getAPrependedModule() { result = getAPrependedModule(this) } + + /** Gets an `include`d module. */ + Module getAnIncludedModule() { result = getAnIncludedModule(this) } + + /** Holds if this module is a class. */ + pragma[noinline] + predicate isClass() { this.getADeclaration() instanceof ClassDeclaration } + + /** Gets a textual representation of this module. */ + string toString() { + this = TResolved(result) + or + exists(Namespace n | this = TUnresolved(n) and result = "...::" + n.toString()) + } + + /** Gets the location of this module. */ + Location getLocation() { + exists(Namespace n | this = TUnresolved(n) and result = n.getLocation()) + or + result = + min(Namespace n, string qName, Location loc, int weight | + this = TResolved(qName) and + qName = namespaceDeclaration(n) and + loc = n.getLocation() and + if exists(loc.getFile().getRelativePath()) then weight = 0 else weight = 1 + | + loc + order by + weight, count(n.getAStmt()) desc, loc.getFile().getAbsolutePath(), loc.getStartLine(), + loc.getStartColumn() + ) + } +} + +/** + * The base class for classes, singleton classes, and modules. + */ +class ModuleBase extends BodyStmt, Scope, TModuleBase { + /** Gets a method defined in this module/class. */ + MethodBase getAMethod() { result = this.getAStmt() } + + /** Gets the method named `name` in this module/class, if any. */ + MethodBase getMethod(string name) { result = this.getAMethod() and result.getName() = name } + + /** Gets a class defined in this module/class. */ + ClassDeclaration getAClass() { result = this.getAStmt() } + + /** Gets the class named `name` in this module/class, if any. */ + ClassDeclaration getClass(string name) { result = this.getAClass() and result.getName() = name } + + /** Gets a module defined in this module/class. */ + ModuleDeclaration getAModule() { result = this.getAStmt() } + + /** Gets the module named `name` in this module/class, if any. */ + ModuleDeclaration getModule(string name) { + result = this.getAModule() and result.getName() = name + } + + /** + * Gets the value of the constant named `name`, if any. + * + * For example, the value of `CONST` is `"const"` in + * ```rb + * module M + * CONST = "const" + * end + * ``` + */ + Expr getConstant(string name) { + exists(AssignExpr ae, ConstantWriteAccess w | + ae = this.getAStmt() and + w = ae.getLeftOperand() and + w.getName() = name and + not exists(w.getScopeExpr()) and + result = ae.getRightOperand() + ) + } + + /** Gets the representation of the run-time value of this module or class. */ + Module getModule() { none() } +} + +/** + * A Ruby source file. + * + * ```rb + * def main + * puts "hello world!" + * end + * main + * ``` + */ +class Toplevel extends ModuleBase, TToplevel { + private Ruby::Program g; + + Toplevel() { this = TToplevel(g) } + + final override string getAPrimaryQlClass() { result = "Toplevel" } + + /** + * Gets the `n`th `BEGIN` block. + */ + final BeginBlock getBeginBlock(int n) { + toGenerated(result) = rank[n + 1](int i, Ruby::BeginBlock b | b = g.getChild(i) | b order by i) + } + + /** + * Gets a `BEGIN` block. + */ + final BeginBlock getABeginBlock() { result = getBeginBlock(_) } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBeginBlock" and result = this.getBeginBlock(_) + } + + final override Module getModule() { result = TResolved("Object") } + + final override string toString() { result = g.getLocation().getFile().getBaseName() } +} + +/** + * A class or module definition. + * + * ```rb + * class Foo + * def bar + * end + * end + * module Bar + * class Baz + * end + * end + * ``` + */ +class Namespace extends ModuleBase, ConstantWriteAccess, TNamespace { + override string getAPrimaryQlClass() { result = "Namespace" } + + /** + * Gets the name of the module/class. In the following example, the result is + * `"Foo"`. + * ```rb + * class Foo + * end + * ``` + * + * N.B. in the following example, where the module/class name uses the scope + * resolution operator, the result is the name being resolved, i.e. `"Bar"`. + * Use `getScopeExpr` to get the `Foo` for `Foo`. + * ```rb + * module Foo::Bar + * end + * ``` + */ + override string getName() { none() } + + /** + * Gets the scope expression used in the module/class name's scope resolution + * operation, if any. + * + * In the following example, the result is the `Expr` for `Foo`. + * + * ```rb + * module Foo::Bar + * end + * ``` + * + * However, there is no result for the following example, since there is no + * scope resolution operation. + * + * ```rb + * module Baz + * end + * ``` + */ + override Expr getScopeExpr() { none() } + + /** + * Holds if the module/class name uses the scope resolution operator to access the + * global scope, as in this example: + * + * ```rb + * class ::Foo + * end + * ``` + */ + override predicate hasGlobalScope() { none() } + + final override Module getModule() { + result = any(string qName | qName = namespaceDeclaration(this) | TResolved(qName)) + or + result = TUnresolved(this) + } + + override AstNode getAChild(string pred) { + result = ModuleBase.super.getAChild(pred) or + result = ConstantWriteAccess.super.getAChild(pred) + } + + final override string toString() { result = ConstantWriteAccess.super.toString() } +} + +/** + * A class definition. + * + * ```rb + * class Foo + * def bar + * end + * end + * ``` + */ +class ClassDeclaration extends Namespace, TClassDeclaration { + private Ruby::Class g; + + ClassDeclaration() { this = TClassDeclaration(g) } + + final override string getAPrimaryQlClass() { result = "ClassDeclaration" } + + /** + * Gets the `Expr` used as the superclass in the class definition, if any. + * + * In the following example, the result is a `ConstantReadAccess`. + * ```rb + * class Foo < Bar + * end + * ``` + * + * In the following example, where the superclass is a call expression, the + * result is a `Call`. + * ```rb + * class C < foo() + * end + * ``` + */ + final Expr getSuperclassExpr() { toGenerated(result) = g.getSuperclass().getChild() } + + final override string getName() { + result = g.getName().(Ruby::Token).getValue() or + result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue() + } + + final override Expr getScopeExpr() { + toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope() + } + + final override predicate hasGlobalScope() { + exists(Ruby::ScopeResolution sr | + sr = g.getName() and + not exists(sr.getScope()) + ) + } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getSuperclassExpr" and result = this.getSuperclassExpr() + } +} + +/** + * A definition of a singleton class on an object. + * + * ```rb + * class << foo + * def bar + * p 'bar' + * end + * end + * ``` + */ +class SingletonClass extends ModuleBase, TSingletonClass { + private Ruby::SingletonClass g; + + SingletonClass() { this = TSingletonClass(g) } + + final override string getAPrimaryQlClass() { result = "SingletonClass" } + + /** + * Gets the expression resulting in the object on which the singleton class + * is defined. In the following example, the result is the `Expr` for `foo`: + * + * ```rb + * class << foo + * end + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue() } + + final override string toString() { result = "class << ..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + } +} + +/** + * A module definition. + * + * ```rb + * module Foo + * class Bar + * end + * end + * ``` + * + * N.B. this class represents a single instance of a module definition. In the + * following example, classes `Bar` and `Baz` are both defined in the module + * `Foo`, but in two syntactically distinct definitions, meaning that there + * will be two instances of `ModuleDeclaration` in the database. + * + * ```rb + * module Foo + * class Bar; end + * end + * + * module Foo + * class Baz; end + * end + * ``` + */ +class ModuleDeclaration extends Namespace, TModuleDeclaration { + private Ruby::Module g; + + ModuleDeclaration() { this = TModuleDeclaration(g) } + + final override string getAPrimaryQlClass() { result = "ModuleDeclaration" } + + final override string getName() { + result = g.getName().(Ruby::Token).getValue() or + result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue() + } + + final override Expr getScopeExpr() { + toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope() + } + + final override predicate hasGlobalScope() { + exists(Ruby::ScopeResolution sr | + sr = g.getName() and + not exists(sr.getScope()) + ) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll new file mode 100644 index 00000000000..236439700ce --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll @@ -0,0 +1,620 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.TreeSitter +private import internal.Operation + +/** + * An operation. + * + * This is the QL root class for all operations. + */ +class Operation extends Expr instanceof OperationImpl { + /** Gets the operator of this operation. */ + final string getOperator() { result = super.getOperatorImpl() } + + /** Gets an operand of this operation. */ + final Expr getAnOperand() { result = super.getAnOperandImpl() } + + override AstNode getAChild(string pred) { + result = Expr.super.getAChild(pred) + or + pred = "getAnOperand" and result = this.getAnOperand() + } +} + +/** A unary operation. */ +class UnaryOperation extends Operation, MethodCall instanceof UnaryOperationImpl { + /** Gets the operand of this unary operation. */ + final Expr getOperand() { result = super.getOperandImpl() } + + final override AstNode getAChild(string pred) { + result = Operation.super.getAChild(pred) + or + result = MethodCall.super.getAChild(pred) + or + pred = "getOperand" and result = this.getOperand() + } + + final override string toString() { result = this.getOperator() + " ..." } +} + +/** A unary logical operation. */ +class UnaryLogicalOperation extends UnaryOperation, TUnaryLogicalOperation { } + +/** + * A logical NOT operation, using either `!` or `not`. + * ```rb + * !x.nil? + * not params.empty? + * ``` + */ +class NotExpr extends UnaryLogicalOperation, TNotExpr { + final override string getAPrimaryQlClass() { result = "NotExpr" } +} + +/** A unary arithmetic operation. */ +class UnaryArithmeticOperation extends UnaryOperation, TUnaryArithmeticOperation { } + +/** + * A unary plus expression. + * ```rb + * + a + * ``` + */ +class UnaryPlusExpr extends UnaryArithmeticOperation, TUnaryPlusExpr { + final override string getAPrimaryQlClass() { result = "UnaryPlusExpr" } +} + +/** + * A unary minus expression. + * ```rb + * - a + * ``` + */ +class UnaryMinusExpr extends UnaryArithmeticOperation, TUnaryMinusExpr { + final override string getAPrimaryQlClass() { result = "UnaryMinusExpr" } +} + +/** + * A splat expression. + * ```rb + * foo(*args) + * ``` + */ +class SplatExpr extends UnaryOperation, TSplatExpr { + final override string getAPrimaryQlClass() { result = "SplatExpr" } +} + +/** + * A hash-splat (or 'double-splat') expression. + * ```rb + * foo(**options) + * ``` + */ +class HashSplatExpr extends UnaryOperation, THashSplatExpr { + private Ruby::HashSplatArgument g; + + HashSplatExpr() { this = THashSplatExpr(g) } + + final override string getAPrimaryQlClass() { result = "HashSplatExpr" } +} + +/** A unary bitwise operation. */ +class UnaryBitwiseOperation extends UnaryOperation, TUnaryBitwiseOperation { } + +/** + * A complement (bitwise NOT) expression. + * ```rb + * ~x + * ``` + */ +class ComplementExpr extends UnaryBitwiseOperation, TComplementExpr { + final override string getAPrimaryQlClass() { result = "ComplementExpr" } +} + +/** + * A call to the special `defined?` operator. + * ```rb + * defined? some_method + * ``` + */ +class DefinedExpr extends UnaryOperation, TDefinedExpr { + final override string getAPrimaryQlClass() { result = "DefinedExpr" } +} + +/** A binary operation. */ +class BinaryOperation extends Operation, MethodCall instanceof BinaryOperationImpl { + final override string toString() { result = "... " + this.getOperator() + " ..." } + + override AstNode getAChild(string pred) { + result = Operation.super.getAChild(pred) + or + result = MethodCall.super.getAChild(pred) + or + pred = "getLeftOperand" and result = this.getLeftOperand() + or + pred = "getRightOperand" and result = this.getRightOperand() + } + + /** Gets the left operand of this binary operation. */ + final Stmt getLeftOperand() { result = super.getLeftOperandImpl() } + + /** Gets the right operand of this binary operation. */ + final Stmt getRightOperand() { result = super.getRightOperandImpl() } +} + +/** + * A binary arithmetic operation. + */ +class BinaryArithmeticOperation extends BinaryOperation, TBinaryArithmeticOperation { } + +/** + * An add expression. + * ```rb + * x + 1 + * ``` + */ +class AddExpr extends BinaryArithmeticOperation, TAddExpr { + final override string getAPrimaryQlClass() { result = "AddExpr" } +} + +/** + * A subtract expression. + * ```rb + * x - 3 + * ``` + */ +class SubExpr extends BinaryArithmeticOperation, TSubExpr { + final override string getAPrimaryQlClass() { result = "SubExpr" } +} + +/** + * A multiply expression. + * ```rb + * x * 10 + * ``` + */ +class MulExpr extends BinaryArithmeticOperation, TMulExpr { + final override string getAPrimaryQlClass() { result = "MulExpr" } +} + +/** + * A divide expression. + * ```rb + * x / y + * ``` + */ +class DivExpr extends BinaryArithmeticOperation, TDivExpr { + final override string getAPrimaryQlClass() { result = "DivExpr" } +} + +/** + * A modulo expression. + * ```rb + * x % 2 + * ``` + */ +class ModuloExpr extends BinaryArithmeticOperation, TModuloExpr { + final override string getAPrimaryQlClass() { result = "ModuloExpr" } +} + +/** + * An exponent expression. + * ```rb + * x ** 2 + * ``` + */ +class ExponentExpr extends BinaryArithmeticOperation, TExponentExpr { + final override string getAPrimaryQlClass() { result = "ExponentExpr" } +} + +/** + * A binary logical operation. + */ +class BinaryLogicalOperation extends BinaryOperation, TBinaryLogicalOperation { } + +/** + * A logical AND operation, using either `and` or `&&`. + * ```rb + * x and y + * a && b + * ``` + */ +class LogicalAndExpr extends BinaryLogicalOperation, TLogicalAndExpr { + final override string getAPrimaryQlClass() { result = "LogicalAndExpr" } +} + +/** + * A logical OR operation, using either `or` or `||`. + * ```rb + * x or y + * a || b + * ``` + */ +class LogicalOrExpr extends BinaryLogicalOperation, TLogicalOrExpr { + final override string getAPrimaryQlClass() { result = "LogicalOrExpr" } +} + +/** + * A binary bitwise operation. + */ +class BinaryBitwiseOperation extends BinaryOperation, TBinaryBitwiseOperation { } + +/** + * A left-shift operation. + * ```rb + * x << n + * ``` + */ +class LShiftExpr extends BinaryBitwiseOperation, TLShiftExpr { + final override string getAPrimaryQlClass() { result = "LShiftExpr" } +} + +/** + * A right-shift operation. + * ```rb + * x >> n + * ``` + */ +class RShiftExpr extends BinaryBitwiseOperation, TRShiftExpr { + final override string getAPrimaryQlClass() { result = "RShiftExpr" } +} + +/** + * A bitwise AND operation. + * ```rb + * x & 0xff + * ``` + */ +class BitwiseAndExpr extends BinaryBitwiseOperation, TBitwiseAndExpr { + final override string getAPrimaryQlClass() { result = "BitwiseAndExpr" } +} + +/** + * A bitwise OR operation. + * ```rb + * x | 0x01 + * ``` + */ +class BitwiseOrExpr extends BinaryBitwiseOperation, TBitwiseOrExpr { + final override string getAPrimaryQlClass() { result = "BitwiseOrExpr" } +} + +/** + * An XOR (exclusive OR) operation. + * ```rb + * x ^ y + * ``` + */ +class BitwiseXorExpr extends BinaryBitwiseOperation, TBitwiseXorExpr { + final override string getAPrimaryQlClass() { result = "BitwiseXorExpr" } +} + +/** + * A comparison operation. That is, either an equality operation or a + * relational operation. + */ +class ComparisonOperation extends BinaryOperation, TComparisonOperation { } + +/** + * An equality operation. + */ +class EqualityOperation extends ComparisonOperation, TEqualityOperation { } + +/** + * An equals expression. + * ```rb + * x == y + * ``` + */ +class EqExpr extends EqualityOperation, TEqExpr { + final override string getAPrimaryQlClass() { result = "EqExpr" } +} + +/** + * A not-equals expression. + * ```rb + * x != y + * ``` + */ +class NEExpr extends EqualityOperation, TNEExpr { + final override string getAPrimaryQlClass() { result = "NEExpr" } +} + +/** + * A case-equality (or 'threequals') expression. + * ```rb + * String === "foo" + * ``` + */ +class CaseEqExpr extends EqualityOperation, TCaseEqExpr { + final override string getAPrimaryQlClass() { result = "CaseEqExpr" } +} + +/** + * A relational operation, that is, one of `<=`, `<`, `>`, or `>=`. + */ +class RelationalOperation extends ComparisonOperation, TRelationalOperation { + /** Gets the greater operand. */ + Expr getGreaterOperand() { none() } + + /** Gets the lesser operand. */ + Expr getLesserOperand() { none() } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getGreaterOperand" and result = this.getGreaterOperand() + or + pred = "getLesserOperand" and result = this.getLesserOperand() + } +} + +/** + * A greater-than expression. + * ```rb + * x > 0 + * ``` + */ +class GTExpr extends RelationalOperation, TGTExpr { + final override string getAPrimaryQlClass() { result = "GTExpr" } + + final override Expr getGreaterOperand() { result = this.getLeftOperand() } + + final override Expr getLesserOperand() { result = this.getRightOperand() } +} + +/** + * A greater-than-or-equal expression. + * ```rb + * x >= 0 + * ``` + */ +class GEExpr extends RelationalOperation, TGEExpr { + final override string getAPrimaryQlClass() { result = "GEExpr" } + + final override Expr getGreaterOperand() { result = this.getLeftOperand() } + + final override Expr getLesserOperand() { result = this.getRightOperand() } +} + +/** + * A less-than expression. + * ```rb + * x < 10 + * ``` + */ +class LTExpr extends RelationalOperation, TLTExpr { + final override string getAPrimaryQlClass() { result = "LTExpr" } + + final override Expr getGreaterOperand() { result = this.getRightOperand() } + + final override Expr getLesserOperand() { result = this.getLeftOperand() } +} + +/** + * A less-than-or-equal expression. + * ```rb + * x <= 10 + * ``` + */ +class LEExpr extends RelationalOperation, TLEExpr { + final override string getAPrimaryQlClass() { result = "LEExpr" } + + final override Expr getGreaterOperand() { result = this.getRightOperand() } + + final override Expr getLesserOperand() { result = this.getLeftOperand() } +} + +/** + * A three-way comparison ('spaceship') expression. + * ```rb + * a <=> b + * ``` + */ +class SpaceshipExpr extends BinaryOperation, TSpaceshipExpr { + final override string getAPrimaryQlClass() { result = "SpaceshipExpr" } +} + +/** + * A regexp match expression. + * ```rb + * input =~ /\d/ + * ``` + */ +class RegExpMatchExpr extends BinaryOperation, TRegExpMatchExpr { + final override string getAPrimaryQlClass() { result = "RegExpMatchExpr" } +} + +/** + * A regexp-doesn't-match expression. + * ```rb + * input !~ /\d/ + * ``` + */ +class NoRegExpMatchExpr extends BinaryOperation, TNoRegExpMatchExpr { + final override string getAPrimaryQlClass() { result = "NoRegExpMatchExpr" } +} + +/** + * A binary assignment operation, including `=`, `+=`, `&=`, etc. + * + * This is a QL base class for all assignments. + */ +class Assignment extends Operation instanceof AssignmentImpl { + /** Gets the left hand side of this assignment. */ + final Pattern getLeftOperand() { result = super.getLeftOperandImpl() } + + /** Gets the right hand side of this assignment. */ + final Expr getRightOperand() { result = super.getRightOperandImpl() } + + final override string toString() { result = "... " + this.getOperator() + " ..." } + + override AstNode getAChild(string pred) { + result = Operation.super.getAChild(pred) + or + pred = "getLeftOperand" and result = getLeftOperand() + or + pred = "getRightOperand" and result = getRightOperand() + } +} + +/** + * An assignment operation with the operator `=`. + * ```rb + * x = 123 + * ``` + */ +class AssignExpr extends Assignment, TAssignExpr { + final override string getAPrimaryQlClass() { result = "AssignExpr" } +} + +/** + * A binary assignment operation other than `=`. + */ +class AssignOperation extends Assignment instanceof AssignOperationImpl { } + +/** + * An arithmetic assignment operation: `+=`, `-=`, `*=`, `/=`, `**=`, and `%=`. + */ +class AssignArithmeticOperation extends AssignOperation, TAssignArithmeticOperation { } + +/** + * A `+=` assignment expression. + * ```rb + * x += 1 + * ``` + */ +class AssignAddExpr extends AssignArithmeticOperation, TAssignAddExpr { + final override string getAPrimaryQlClass() { result = "AssignAddExpr" } +} + +/** + * A `-=` assignment expression. + * ```rb + * x -= 3 + * ``` + */ +class AssignSubExpr extends AssignArithmeticOperation, TAssignSubExpr { + final override string getAPrimaryQlClass() { result = "AssignSubExpr" } +} + +/** + * A `*=` assignment expression. + * ```rb + * x *= 10 + * ``` + */ +class AssignMulExpr extends AssignArithmeticOperation, TAssignMulExpr { + final override string getAPrimaryQlClass() { result = "AssignMulExpr" } +} + +/** + * A `/=` assignment expression. + * ```rb + * x /= y + * ``` + */ +class AssignDivExpr extends AssignArithmeticOperation, TAssignDivExpr { + final override string getAPrimaryQlClass() { result = "AssignDivExpr" } +} + +/** + * A `%=` assignment expression. + * ```rb + * x %= 4 + * ``` + */ +class AssignModuloExpr extends AssignArithmeticOperation, TAssignModuloExpr { + final override string getAPrimaryQlClass() { result = "AssignModuloExpr" } +} + +/** + * A `**=` assignment expression. + * ```rb + * x **= 2 + * ``` + */ +class AssignExponentExpr extends AssignArithmeticOperation, TAssignExponentExpr { + final override string getAPrimaryQlClass() { result = "AssignExponentExpr" } +} + +/** + * A logical assignment operation: `&&=` and `||=`. + */ +class AssignLogicalOperation extends AssignOperation, TAssignLogicalOperation { } + +/** + * A logical AND assignment operation. + * ```rb + * x &&= y.even? + * ``` + */ +class AssignLogicalAndExpr extends AssignLogicalOperation, TAssignLogicalAndExpr { + final override string getAPrimaryQlClass() { result = "AssignLogicalAndExpr" } +} + +/** + * A logical OR assignment operation. + * ```rb + * x ||= y + * ``` + */ +class AssignLogicalOrExpr extends AssignLogicalOperation, TAssignLogicalOrExpr { + final override string getAPrimaryQlClass() { result = "AssignLogicalOrExpr" } +} + +/** + * A bitwise assignment operation: `<<=`, `>>=`, `&=`, `|=` and `^=`. + */ +class AssignBitwiseOperation extends AssignOperation, TAssignBitwiseOperation { } + +/** + * A left-shift assignment operation. + * ```rb + * x <<= 3 + * ``` + */ +class AssignLShiftExpr extends AssignBitwiseOperation, TAssignLShiftExpr { + final override string getAPrimaryQlClass() { result = "AssignLShiftExpr" } +} + +/** + * A right-shift assignment operation. + * ```rb + * x >>= 3 + * ``` + */ +class AssignRShiftExpr extends AssignBitwiseOperation, TAssignRShiftExpr { + final override string getAPrimaryQlClass() { result = "AssignRShiftExpr" } +} + +/** + * A bitwise AND assignment operation. + * ```rb + * x &= 0xff + * ``` + */ +class AssignBitwiseAndExpr extends AssignBitwiseOperation, TAssignBitwiseAndExpr { + final override string getAPrimaryQlClass() { result = "AssignBitwiseAndExpr" } +} + +/** + * A bitwise OR assignment operation. + * ```rb + * x |= 0x01 + * ``` + */ +class AssignBitwiseOrExpr extends AssignBitwiseOperation, TAssignBitwiseOrExpr { + final override string getAPrimaryQlClass() { result = "AssignBitwiseOrExpr" } +} + +/** + * An XOR (exclusive OR) assignment operation. + * ```rb + * x ^= y + * ``` + */ +class AssignBitwiseXorExpr extends AssignBitwiseOperation, TAssignBitwiseXorExpr { + final override string getAPrimaryQlClass() { result = "AssignBitwiseXorExpr" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll new file mode 100644 index 00000000000..6e6b5395d43 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll @@ -0,0 +1,248 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Variable +private import internal.Parameter +private import internal.TreeSitter + +/** A parameter. */ +class Parameter extends AstNode, TParameter { + /** Gets the callable that this parameter belongs to. */ + final Callable getCallable() { result.getAParameter() = this } + + /** Gets the zero-based position of this parameter. */ + final int getPosition() { this = any(Callable c).getParameter(result) } + + /** Gets a variable introduced by this parameter. */ + LocalVariable getAVariable() { none() } + + /** Gets the variable named `name` introduced by this parameter. */ + final LocalVariable getVariable(string name) { + result = this.getAVariable() and + result.getName() = name + } +} + +/** + * A parameter defined using a pattern. + * + * This includes both simple parameters and tuple parameters. + */ +class PatternParameter extends Parameter, Pattern, TPatternParameter { + override LocalVariable getAVariable() { result = Pattern.super.getAVariable() } +} + +/** A parameter defined using a tuple pattern. */ +class TuplePatternParameter extends PatternParameter, TuplePattern, TTuplePatternParameter { + final override LocalVariable getAVariable() { result = TuplePattern.super.getAVariable() } + + final override string getAPrimaryQlClass() { result = "TuplePatternParameter" } + + override AstNode getAChild(string pred) { result = TuplePattern.super.getAChild(pred) } +} + +/** A named parameter. */ +class NamedParameter extends Parameter, TNamedParameter { + /** Gets the name of this parameter. */ + string getName() { none() } + + /** Holds if the name of this parameter is `name`. */ + final predicate hasName(string name) { this.getName() = name } + + /** Gets the variable introduced by this parameter. */ + LocalVariable getVariable() { none() } + + override LocalVariable getAVariable() { result = this.getVariable() } + + /** Gets an access to this parameter. */ + final VariableAccess getAnAccess() { result = this.getVariable().getAnAccess() } + + /** Gets the access that defines the underlying local variable. */ + final VariableAccess getDefiningAccess() { result = this.getVariable().getDefiningAccess() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getDefiningAccess" and + result = this.getDefiningAccess() + } +} + +/** A simple (normal) parameter. */ +class SimpleParameter extends NamedParameter, PatternParameter, VariablePattern, TSimpleParameter { + private Ruby::Identifier g; + + SimpleParameter() { this = TSimpleParameter(g) } + + final override string getName() { result = g.getValue() } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g) } + + final override LocalVariable getAVariable() { result = this.getVariable() } + + final override string getAPrimaryQlClass() { result = "SimpleParameter" } + + final override string toString() { result = this.getName() } +} + +/** + * A parameter that is a block. For example, `&bar` in the following code: + * ```rb + * def foo(&bar) + * bar.call if block_given? + * end + * ``` + */ +class BlockParameter extends NamedParameter, TBlockParameter { + private Ruby::BlockParameter g; + + BlockParameter() { this = TBlockParameter(g) } + + final override string getName() { result = g.getName().getValue() } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + + final override string toString() { result = "&" + this.getName() } + + final override string getAPrimaryQlClass() { result = "BlockParameter" } +} + +/** + * A hash-splat (or double-splat) parameter. For example, `**options` in the + * following code: + * ```rb + * def foo(bar, **options) + * ... + * end + * ``` + */ +class HashSplatParameter extends NamedParameter, THashSplatParameter { + private Ruby::HashSplatParameter g; + + HashSplatParameter() { this = THashSplatParameter(g) } + + final override string getAPrimaryQlClass() { result = "HashSplatParameter" } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + + final override string toString() { result = "**" + this.getName() } + + final override string getName() { result = g.getName().getValue() } +} + +/** + * A keyword parameter, including a default value if the parameter is optional. + * For example, in the following example, `foo` is a keyword parameter with a + * default value of `0`, and `bar` is a mandatory keyword parameter with no + * default value mandatory parameter). + * ```rb + * def f(foo: 0, bar:) + * foo * 10 + bar + * end + * ``` + */ +class KeywordParameter extends NamedParameter, TKeywordParameter { + private Ruby::KeywordParameter g; + + KeywordParameter() { this = TKeywordParameter(g) } + + final override string getAPrimaryQlClass() { result = "KeywordParameter" } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + + /** + * Gets the default value, i.e. the value assigned to the parameter when one + * is not provided by the caller. If the parameter is mandatory and does not + * have a default value, this predicate has no result. + */ + final Expr getDefaultValue() { toGenerated(result) = g.getValue() } + + /** + * Holds if the parameter is optional. That is, there is a default value that + * is used when the caller omits this parameter. + */ + final predicate isOptional() { exists(this.getDefaultValue()) } + + final override string toString() { result = this.getName() } + + final override string getName() { result = g.getName().getValue() } + + final override Location getLocation() { result = g.getName().getLocation() } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getDefaultValue" and result = this.getDefaultValue() + } +} + +/** + * An optional parameter. For example, the parameter `name` in the following + * code: + * ```rb + * def say_hello(name = 'Anon') + * puts "hello #{name}" + * end + * ``` + */ +class OptionalParameter extends NamedParameter, TOptionalParameter { + private Ruby::OptionalParameter g; + + OptionalParameter() { this = TOptionalParameter(g) } + + final override string getAPrimaryQlClass() { result = "OptionalParameter" } + + /** + * Gets the default value, i.e. the value assigned to the parameter when one + * is not provided by the caller. + */ + final Expr getDefaultValue() { toGenerated(result) = g.getValue() } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + + final override string toString() { result = this.getName() } + + final override string getName() { result = g.getName().getValue() } + + final override Location getLocation() { result = g.getName().getLocation() } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getDefaultValue" and result = this.getDefaultValue() + } +} + +/** + * A splat parameter. For example, `*values` in the following code: + * ```rb + * def foo(bar, *values) + * ... + * end + * ``` + */ +class SplatParameter extends NamedParameter, TSplatParameter { + private Ruby::SplatParameter g; + + SplatParameter() { this = TSplatParameter(g) } + + final override string getAPrimaryQlClass() { result = "SplatParameter" } + + final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + + final override string toString() { result = "*" + this.getName() } + + final override string getName() { result = g.getName().getValue() } +} + +/** + * A special `...` parameter that forwards positional/keyword/block arguments: + * ```rb + * def foo(...) + * end + * ``` + */ +class ForwardParameter extends Parameter, TForwardParameter { + final override string getAPrimaryQlClass() { result = "ForwardParameter" } + + final override string toString() { result = "..." } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll new file mode 100644 index 00000000000..7275894b57d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll @@ -0,0 +1,96 @@ +private import codeql.ruby.AST +private import codeql.Locations +private import internal.AST +private import internal.Pattern +private import internal.TreeSitter +private import internal.Variable + +/** A pattern. */ +class Pattern extends AstNode { + Pattern() { + explicitAssignmentNode(toGenerated(this), _) + or + implicitAssignmentNode(toGenerated(this)) + or + implicitParameterAssignmentNode(toGenerated(this), _) + or + this = getSynthChild(any(AssignExpr ae), 0) + } + + /** Gets a variable used in (or introduced by) this pattern. */ + Variable getAVariable() { none() } +} + +private class LhsExpr_ = + TVariableAccess or TTokenConstantAccess or TScopeResolutionConstantAccess or TMethodCall or + TSimpleParameter; + +/** + * A "left-hand-side" expression. An `LhsExpr` can occur on the left-hand side of + * operator assignments (`AssignOperation`), in patterns (`Pattern`) on the left-hand side of + * an assignment (`AssignExpr`) or for loop (`ForExpr`), and as the exception + * variable of a `rescue` clause (`RescueClause`). + * + * An `LhsExpr` can be a simple variable, a constant, a call, or an element reference: + * ```rb + * var = 1 + * var += 1 + * E = 1 + * foo.bar = 1 + * foo[0] = 1 + * rescue E => var + * ``` + */ +class LhsExpr extends Pattern, LhsExpr_, Expr { + override Variable getAVariable() { result = this.(VariableAccess).getVariable() } +} + +private class TVariablePattern = TVariableAccess or TSimpleParameter; + +/** A simple variable pattern. */ +class VariablePattern extends Pattern, LhsExpr, TVariablePattern { } + +/** + * A tuple pattern. + * + * This includes both tuple patterns in parameters and assignments. Example patterns: + * ```rb + * a, self.b = value + * (a, b), c[3] = value + * a, b, *rest, c, d = value + * ``` + */ +class TuplePattern extends Pattern, TTuplePattern { + override string getAPrimaryQlClass() { result = "TuplePattern" } + + private TuplePatternImpl getImpl() { result = toGenerated(this) } + + private Ruby::AstNode getChild(int i) { result = this.getImpl().getChildNode(i) } + + /** Gets the `i`th pattern in this tuple pattern. */ + final Pattern getElement(int i) { + exists(Ruby::AstNode c | c = this.getChild(i) | + toGenerated(result) = c.(Ruby::RestAssignment).getChild() + or + toGenerated(result) = c + ) + } + + /** Gets a sub pattern in this tuple pattern. */ + final Pattern getAnElement() { result = this.getElement(_) } + + /** + * Gets the index of the pattern with the `*` marker on it, if it exists. + * In the example below the index is `2`. + * ```rb + * a, b, *rest, c, d = value + * ``` + */ + final int getRestIndex() { result = this.getImpl().getRestIndex() } + + override Variable getAVariable() { result = this.getElement(_).getAVariable() } + + override string toString() { result = "(..., ...)" } + + override AstNode getAChild(string pred) { pred = "getElement" and result = getElement(_) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll new file mode 100644 index 00000000000..45fb00ae731 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll @@ -0,0 +1,22 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Scope +private import internal.TreeSitter + +class Scope extends AstNode, TScopeType { + private Scope::Range range; + + Scope() { range = toGenerated(this) } + + /** Gets the scope in which this scope is nested, if any. */ + Scope getOuterScope() { toGenerated(result) = range.getOuterScope() } + + /** Gets a variable that is declared in this scope. */ + final Variable getAVariable() { result.getDeclaringScope() = this } + + /** Gets the variable declared in this scope with the given name, if any. */ + final Variable getVariable(string name) { + result = this.getAVariable() and + result.getName() = name + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll new file mode 100644 index 00000000000..e3d77c2010c --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll @@ -0,0 +1,248 @@ +private import codeql.ruby.AST +private import codeql.ruby.CFG +private import internal.AST +private import internal.TreeSitter +private import internal.Variable +private import codeql.ruby.controlflow.internal.ControlFlowGraphImpl + +/** + * A statement. + * + * This is the root QL class for all statements. + */ +class Stmt extends AstNode, TStmt { + /** Gets a control-flow node for this statement, if any. */ + CfgNodes::AstCfgNode getAControlFlowNode() { result.getNode() = this } + + /** Gets the control-flow scope of this statement, if any. */ + CfgScope getCfgScope() { result = getCfgScope(this) } + + /** Gets the enclosing callable, if any. */ + Callable getEnclosingCallable() { result = this.getCfgScope() } +} + +/** + * An empty statement (`;`). + */ +class EmptyStmt extends Stmt, TEmptyStmt { + final override string getAPrimaryQlClass() { result = "EmptyStmt" } + + final override string toString() { result = ";" } +} + +/** + * A `begin` statement. + * ```rb + * begin + * puts "hello world" + * end + * ``` + */ +class BeginExpr extends BodyStmt, TBeginExpr { + final override string getAPrimaryQlClass() { result = "BeginExpr" } + + final override string toString() { result = "begin ... " } +} + +/** + * A `BEGIN` block. + * ```rb + * BEGIN { puts "starting ..." } + * ``` + */ +class BeginBlock extends StmtSequence, TBeginBlock { + private Ruby::BeginBlock g; + + BeginBlock() { this = TBeginBlock(g) } + + final override string getAPrimaryQlClass() { result = "BeginBlock" } + + final override string toString() { result = "BEGIN { ... }" } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } +} + +/** + * An `END` block. + * ```rb + * END { puts "shutting down" } + * ``` + */ +class EndBlock extends StmtSequence, TEndBlock { + private Ruby::EndBlock g; + + EndBlock() { this = TEndBlock(g) } + + final override string getAPrimaryQlClass() { result = "EndBlock" } + + final override string toString() { result = "END { ... }" } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } +} + +/** + * An `undef` statement. For example: + * ```rb + * - undef method_name + * - undef &&, :method_name + * - undef :"method_#{ name }" + * ``` + */ +class UndefStmt extends Stmt, TUndefStmt { + private Ruby::Undef g; + + UndefStmt() { this = TUndefStmt(g) } + + /** Gets the `n`th method name to undefine. */ + final MethodName getMethodName(int n) { toGenerated(result) = g.getChild(n) } + + /** Gets a method name to undefine. */ + final MethodName getAMethodName() { result = getMethodName(_) } + + final override string getAPrimaryQlClass() { result = "UndefStmt" } + + final override string toString() { result = "undef ..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getMethodName" and result = this.getMethodName(_) + } +} + +/** + * An `alias` statement. For example: + * ```rb + * - alias alias_name method_name + * - alias foo :method_name + * - alias bar :"method_#{ name }" + * ``` + */ +class AliasStmt extends Stmt, TAliasStmt { + private Ruby::Alias g; + + AliasStmt() { this = TAliasStmt(g) } + + /** Gets the new method name. */ + final MethodName getNewName() { toGenerated(result) = g.getName() } + + /** Gets the original method name. */ + final MethodName getOldName() { toGenerated(result) = g.getAlias() } + + final override string getAPrimaryQlClass() { result = "AliasStmt" } + + final override string toString() { result = "alias ..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getNewName" and result = this.getNewName() + or + pred = "getOldName" and result = this.getOldName() + } +} + +/** + * A statement that may return a value: `return`, `break` and `next`. + * + * ```rb + * return + * return value + * break + * break value + * next + * next value + * ``` + */ +class ReturningStmt extends Stmt, TReturningStmt { + private Ruby::ArgumentList getArgumentList() { + result = any(Ruby::Return g | this = TReturnStmt(g)).getChild() + or + result = any(Ruby::Break g | this = TBreakStmt(g)).getChild() + or + result = any(Ruby::Next g | this = TNextStmt(g)).getChild() + } + + /** Gets the returned value, if any. */ + final Expr getValue() { + toGenerated(result) = + any(Ruby::AstNode res | + exists(Ruby::ArgumentList a, int c | + a = this.getArgumentList() and c = count(a.getChild(_)) + | + res = a.getChild(0) and c = 1 + or + res = a and c > 1 + ) + ) + } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + } +} + +/** + * A `return` statement. + * ```rb + * return + * return value + * ``` + */ +class ReturnStmt extends ReturningStmt, TReturnStmt { + final override string getAPrimaryQlClass() { result = "ReturnStmt" } + + final override string toString() { result = "return" } +} + +/** + * A `break` statement. + * ```rb + * break + * break value + * ``` + */ +class BreakStmt extends ReturningStmt, TBreakStmt { + final override string getAPrimaryQlClass() { result = "BreakStmt" } + + final override string toString() { result = "break" } +} + +/** + * A `next` statement. + * ```rb + * next + * next value + * ``` + */ +class NextStmt extends ReturningStmt, TNextStmt { + final override string getAPrimaryQlClass() { result = "NextStmt" } + + final override string toString() { result = "next" } +} + +/** + * A `redo` statement. + * ```rb + * redo + * ``` + */ +class RedoStmt extends Stmt, TRedoStmt { + final override string getAPrimaryQlClass() { result = "RedoStmt" } + + final override string toString() { result = "redo" } +} + +/** + * A `retry` statement. + * ```rb + * retry + * ``` + */ +class RetryStmt extends Stmt, TRetryStmt { + final override string getAPrimaryQlClass() { result = "RetryStmt" } + + final override string toString() { result = "retry" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll new file mode 100644 index 00000000000..b16d046d886 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll @@ -0,0 +1,187 @@ +/** Provides classes for modeling program variables. */ + +private import codeql.ruby.AST +private import codeql.Locations +private import internal.AST +private import internal.TreeSitter +private import internal.Variable + +/** A variable declared in a scope. */ +class Variable instanceof VariableImpl { + /** Gets the name of this variable. */ + final string getName() { result = super.getNameImpl() } + + /** Holds if the name of this variable is `name`. */ + final predicate hasName(string name) { this.getName() = name } + + /** Gets a textual representation of this variable. */ + final string toString() { result = this.getName() } + + /** Gets the location of this variable. */ + final Location getLocation() { result = super.getLocationImpl() } + + /** Gets the scope this variable is declared in. */ + final Scope getDeclaringScope() { + toGenerated(result) = this.(VariableReal).getDeclaringScopeImpl() + } + + /** Gets an access to this variable. */ + VariableAccess getAnAccess() { result.getVariable() = this } +} + +/** A local variable. */ +class LocalVariable extends Variable, TLocalVariable { + override LocalVariableAccess getAnAccess() { result.getVariable() = this } + + /** Gets the access where this local variable is first introduced. */ + VariableAccess getDefiningAccess() { result = this.(LocalVariableReal).getDefiningAccessImpl() } + + /** + * Holds if this variable is captured. For example in + * + * ```rb + * def m x + * x.times do |y| + * puts x + * end + * puts x + * end + * ``` + * + * `x` is a captured variable, whereas `y` is not. + */ + predicate isCaptured() { this.getAnAccess().isCapturedAccess() } +} + +/** A global variable. */ +class GlobalVariable extends Variable instanceof GlobalVariableImpl { + final override GlobalVariableAccess getAnAccess() { result.getVariable() = this } +} + +/** An instance variable. */ +class InstanceVariable extends Variable instanceof InstanceVariableImpl { + /** Holds is this variable is a class instance variable. */ + final predicate isClassInstanceVariable() { super.isClassInstanceVariable() } + + final override InstanceVariableAccess getAnAccess() { result.getVariable() = this } +} + +/** A class variable. */ +class ClassVariable extends Variable instanceof ClassVariableImpl { + final override ClassVariableAccess getAnAccess() { result.getVariable() = this } +} + +/** An access to a variable. */ +class VariableAccess extends Expr instanceof VariableAccessImpl { + /** Gets the variable this identifier refers to. */ + final Variable getVariable() { result = super.getVariableImpl() } + + /** + * Holds if this access is a write access belonging to the explicit + * assignment `assignment`. For example, in + * + * ```rb + * a, b = foo + * ``` + * + * both `a` and `b` are write accesses belonging to the same assignment. + */ + predicate isExplicitWrite(AstNode assignment) { + explicitWriteAccess(toGenerated(this), toGenerated(assignment)) + or + this = assignment.(AssignExpr).getLeftOperand() + } + + /** + * Holds if this access is a write access belonging to an implicit assignment. + * For example, in + * + * ```rb + * def m elements + * for e in elements do + * puts e + * end + * end + * ``` + * + * the access to `elements` in the parameter list is an implicit assignment, + * as is the first access to `e`. + */ + predicate isImplicitWrite() { implicitWriteAccess(toGenerated(this)) } + + final override string toString() { result = VariableAccessImpl.super.toString() } +} + +/** An access to a variable where the value is updated. */ +class VariableWriteAccess extends VariableAccess { + VariableWriteAccess() { + this.isExplicitWrite(_) or + this.isImplicitWrite() + } +} + +/** An access to a variable where the value is read. */ +class VariableReadAccess extends VariableAccess { + VariableReadAccess() { not this instanceof VariableWriteAccess } +} + +/** An access to a local variable. */ +class LocalVariableAccess extends VariableAccess instanceof LocalVariableAccessImpl { + final override string getAPrimaryQlClass() { result = "LocalVariableAccess" } + + /** + * Holds if this access is a captured variable access. For example in + * + * ```rb + * def m x + * x.times do |y| + * puts x + * end + * puts x + * end + * ``` + * + * the access to `x` in the first `puts x` is a captured access, while + * the access to `x` in the second `puts x` is not. + */ + final predicate isCapturedAccess() { isCapturedAccess(this) } +} + +/** An access to a local variable where the value is updated. */ +class LocalVariableWriteAccess extends LocalVariableAccess, VariableWriteAccess { } + +/** An access to a local variable where the value is read. */ +class LocalVariableReadAccess extends LocalVariableAccess, VariableReadAccess { } + +/** An access to a global variable. */ +class GlobalVariableAccess extends VariableAccess instanceof GlobalVariableAccessImpl { + final override string getAPrimaryQlClass() { result = "GlobalVariableAccess" } +} + +/** An access to a global variable where the value is updated. */ +class GlobalVariableWriteAccess extends GlobalVariableAccess, VariableWriteAccess { } + +/** An access to a global variable where the value is read. */ +class GlobalVariableReadAccess extends GlobalVariableAccess, VariableReadAccess { } + +/** An access to an instance variable. */ +class InstanceVariableAccess extends VariableAccess instanceof InstanceVariableAccessImpl { + final override string getAPrimaryQlClass() { result = "InstanceVariableAccess" } +} + +/** An access to an instance variable where the value is updated. */ +class InstanceVariableWriteAccess extends InstanceVariableAccess, VariableWriteAccess { } + +/** An access to an instance variable where the value is read. */ +class InstanceVariableReadAccess extends InstanceVariableAccess, VariableReadAccess { } + +/** An access to a class variable. */ +class ClassVariableAccess extends VariableAccess instanceof ClassVariableAccessRealImpl { + final override string getAPrimaryQlClass() { result = "ClassVariableAccess" } +} + +/** An access to a class variable where the value is updated. */ +class ClassVariableWriteAccess extends ClassVariableAccess, VariableWriteAccess { } + +/** An access to a class variable where the value is read. */ +class ClassVariableReadAccess extends ClassVariableAccess, VariableReadAccess { } diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll new file mode 100644 index 00000000000..7df09c9b5d8 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll @@ -0,0 +1,704 @@ +import codeql.Locations +private import TreeSitter +private import codeql.ruby.ast.internal.Call +private import codeql.ruby.ast.internal.Parameter +private import codeql.ruby.ast.internal.Variable +private import codeql.ruby.AST as AST +private import Synthesis + +module MethodName { + predicate range(Ruby::UnderscoreMethodName g) { + exists(Ruby::Undef u | u.getChild(_) = g) + or + exists(Ruby::Alias a | a.getName() = g or a.getAlias() = g) + } + + class Token = + @ruby_setter or @ruby_token_class_variable or @ruby_token_constant or + @ruby_token_global_variable or @ruby_token_identifier or @ruby_token_instance_variable or + @ruby_token_operator; +} + +private predicate mkSynthChild(SynthKind kind, AST::AstNode parent, int i) { + any(Synthesis s).child(parent, i, SynthChild(kind)) +} + +cached +private module Cached { + cached + newtype TAstNode = + TAddExprReal(Ruby::Binary g) { g instanceof @ruby_binary_plus } or + TAddExprSynth(AST::AstNode parent, int i) { mkSynthChild(AddExprKind(), parent, i) } or + TAliasStmt(Ruby::Alias g) or + TArgumentList(Ruby::AstNode g) { + ( + g.getParent() instanceof Ruby::Break or + g.getParent() instanceof Ruby::Return or + g.getParent() instanceof Ruby::Next or + g.getParent() instanceof Ruby::Assignment or + g.getParent() instanceof Ruby::OperatorAssignment + ) and + ( + strictcount(g.(Ruby::ArgumentList).getChild(_)) > 1 + or + g instanceof Ruby::RightAssignmentList + ) + } or + TAssignAddExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_plusequal } or + TAssignBitwiseAndExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_ampersandequal + } or + TAssignBitwiseOrExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_pipeequal + } or + TAssignBitwiseXorExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_caretequal + } or + TAssignDivExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_slashequal } or + TAssignExponentExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_starstarequal + } or + TAssignExprReal(Ruby::Assignment g) or + TAssignExprSynth(AST::AstNode parent, int i) { mkSynthChild(AssignExprKind(), parent, i) } or + TAssignLShiftExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_langlelangleequal + } or + TAssignLogicalAndExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_ampersandampersandequal + } or + TAssignLogicalOrExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_pipepipeequal + } or + TAssignModuloExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_percentequal + } or + TAssignMulExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_starequal } or + TAssignRShiftExpr(Ruby::OperatorAssignment g) { + g instanceof @ruby_operator_assignment_ranglerangleequal + } or + TAssignSubExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_minusequal } or + TBareStringLiteral(Ruby::BareString g) or + TBareSymbolLiteral(Ruby::BareSymbol g) or + TBeginBlock(Ruby::BeginBlock g) or + TBeginExpr(Ruby::Begin g) or + TBitwiseAndExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ampersand } or + TBitwiseAndExprSynth(AST::AstNode parent, int i) { + mkSynthChild(BitwiseAndExprKind(), parent, i) + } or + TBitwiseOrExprReal(Ruby::Binary g) { g instanceof @ruby_binary_pipe } or + TBitwiseOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(BitwiseOrExprKind(), parent, i) } or + TBitwiseXorExprReal(Ruby::Binary g) { g instanceof @ruby_binary_caret } or + TBitwiseXorExprSynth(AST::AstNode parent, int i) { + mkSynthChild(BitwiseXorExprKind(), parent, i) + } or + TBlockArgument(Ruby::BlockArgument g) or + TBlockParameter(Ruby::BlockParameter g) or + TBraceBlock(Ruby::Block g) { not g.getParent() instanceof Ruby::Lambda } or + TBreakStmt(Ruby::Break g) or + TCaseEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequalequal } or + TCaseExpr(Ruby::Case g) or + TCharacterLiteral(Ruby::Character g) or + TClassDeclaration(Ruby::Class g) or + TClassVariableAccessReal(Ruby::ClassVariable g, AST::ClassVariable v) { + ClassVariableAccess::range(g, v) + } or + TClassVariableAccessSynth(AST::AstNode parent, int i, AST::ClassVariable v) { + mkSynthChild(ClassVariableAccessKind(v), parent, i) + } or + TComplementExpr(Ruby::Unary g) { g instanceof @ruby_unary_tilde } or + TComplexLiteral(Ruby::Complex g) or + TConstantReadAccessSynth(AST::AstNode parent, int i, string value) { + mkSynthChild(ConstantReadAccessKind(value), parent, i) + } or + TDefinedExpr(Ruby::Unary g) { g instanceof @ruby_unary_definedquestion } or + TDelimitedSymbolLiteral(Ruby::DelimitedSymbol g) or + TDestructuredLeftAssignment(Ruby::DestructuredLeftAssignment g) { + not strictcount(int i | exists(g.getParent().(Ruby::LeftAssignmentList).getChild(i))) = 1 + } or + TDivExprReal(Ruby::Binary g) { g instanceof @ruby_binary_slash } or + TDivExprSynth(AST::AstNode parent, int i) { mkSynthChild(DivExprKind(), parent, i) } or + TDo(Ruby::Do g) or + TDoBlock(Ruby::DoBlock g) { not g.getParent() instanceof Ruby::Lambda } or + TElementReference(Ruby::ElementReference g) or + TElse(Ruby::Else g) or + TElsif(Ruby::Elsif g) or + TEmptyStmt(Ruby::EmptyStatement g) or + TEndBlock(Ruby::EndBlock g) or + TEnsure(Ruby::Ensure g) or + TEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequal } or + TExponentExprReal(Ruby::Binary g) { g instanceof @ruby_binary_starstar } or + TExponentExprSynth(AST::AstNode parent, int i) { mkSynthChild(ExponentExprKind(), parent, i) } or + TFalseLiteral(Ruby::False g) or + TFloatLiteral(Ruby::Float g) { not any(Ruby::Rational r).getChild() = g } or + TForExpr(Ruby::For g) or + TForIn(Ruby::In g) or // TODO REMOVE + TForwardParameter(Ruby::ForwardParameter g) or + TForwardArgument(Ruby::ForwardArgument g) or + TGEExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangleequal } or + TGTExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangle } or + TGlobalVariableAccessReal(Ruby::GlobalVariable g, AST::GlobalVariable v) { + GlobalVariableAccess::range(g, v) + } or + TGlobalVariableAccessSynth(AST::AstNode parent, int i, AST::GlobalVariable v) { + mkSynthChild(GlobalVariableAccessKind(v), parent, i) + } or + THashKeySymbolLiteral(Ruby::HashKeySymbol g) or + THashLiteral(Ruby::Hash g) or + THashSplatExpr(Ruby::HashSplatArgument g) or + THashSplatParameter(Ruby::HashSplatParameter g) or + THereDoc(Ruby::HeredocBeginning g) or + TIdentifierMethodCall(Ruby::Identifier g) { isIdentifierMethodCall(g) } or + TIf(Ruby::If g) or + TIfModifierExpr(Ruby::IfModifier g) or + TInstanceVariableAccessReal(Ruby::InstanceVariable g, AST::InstanceVariable v) { + InstanceVariableAccess::range(g, v) + } or + TInstanceVariableAccessSynth(AST::AstNode parent, int i, AST::InstanceVariable v) { + mkSynthChild(InstanceVariableAccessKind(v), parent, i) + } or + TIntegerLiteralReal(Ruby::Integer g) { not any(Ruby::Rational r).getChild() = g } or + TIntegerLiteralSynth(AST::AstNode parent, int i, int value) { + mkSynthChild(IntegerLiteralKind(value), parent, i) + } or + TKeywordParameter(Ruby::KeywordParameter g) or + TLEExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequal } or + TLShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_langlelangle } or + TLShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(LShiftExprKind(), parent, i) } or + TLTExpr(Ruby::Binary g) { g instanceof @ruby_binary_langle } or + TLambda(Ruby::Lambda g) or + TLeftAssignmentList(Ruby::LeftAssignmentList g) or + TLocalVariableAccessReal(Ruby::Identifier g, AST::LocalVariable v) { + LocalVariableAccess::range(g, v) + } or + TLocalVariableAccessSynth(AST::AstNode parent, int i, AST::LocalVariable v) { + mkSynthChild(LocalVariableAccessRealKind(v), parent, i) + or + mkSynthChild(LocalVariableAccessSynthKind(v), parent, i) + } or + TLogicalAndExprReal(Ruby::Binary g) { + g instanceof @ruby_binary_and or g instanceof @ruby_binary_ampersandampersand + } or + TLogicalAndExprSynth(AST::AstNode parent, int i) { + mkSynthChild(LogicalAndExprKind(), parent, i) + } or + TLogicalOrExprReal(Ruby::Binary g) { + g instanceof @ruby_binary_or or g instanceof @ruby_binary_pipepipe + } or + TLogicalOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(LogicalOrExprKind(), parent, i) } or + TMethod(Ruby::Method g) or + TMethodCallSynth(AST::AstNode parent, int i, string name, boolean setter, int arity) { + mkSynthChild(MethodCallKind(name, setter, arity), parent, i) + } or + TModuleDeclaration(Ruby::Module g) or + TModuloExprReal(Ruby::Binary g) { g instanceof @ruby_binary_percent } or + TModuloExprSynth(AST::AstNode parent, int i) { mkSynthChild(ModuloExprKind(), parent, i) } or + TMulExprReal(Ruby::Binary g) { g instanceof @ruby_binary_star } or + TMulExprSynth(AST::AstNode parent, int i) { mkSynthChild(MulExprKind(), parent, i) } or + TNEExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangequal } or + TNextStmt(Ruby::Next g) or + TNilLiteral(Ruby::Nil g) or + TNoRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangtilde } or + TNotExpr(Ruby::Unary g) { g instanceof @ruby_unary_bang or g instanceof @ruby_unary_not } or + TOptionalParameter(Ruby::OptionalParameter g) or + TPair(Ruby::Pair g) or + TParenthesizedExpr(Ruby::ParenthesizedStatements g) or + TRShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ranglerangle } or + TRShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(RShiftExprKind(), parent, i) } or + TRangeLiteralReal(Ruby::Range g) or + TRangeLiteralSynth(AST::AstNode parent, int i, boolean inclusive) { + mkSynthChild(RangeLiteralKind(inclusive), parent, i) + } or + TRationalLiteral(Ruby::Rational g) or + TRedoStmt(Ruby::Redo g) or + TRegExpLiteral(Ruby::Regex g) or + TRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_equaltilde } or + TRegularArrayLiteral(Ruby::Array g) or + TRegularMethodCall(Ruby::Call g) { isRegularMethodCall(g) } or + TRegularStringLiteral(Ruby::String g) or + TRegularSuperCall(Ruby::Call g) { g.getMethod() instanceof Ruby::Super } or + TRescueClause(Ruby::Rescue g) or + TRescueModifierExpr(Ruby::RescueModifier g) or + TRetryStmt(Ruby::Retry g) or + TReturnStmt(Ruby::Return g) or + TScopeResolutionConstantAccess(Ruby::ScopeResolution g, Ruby::Constant constant) { + constant = g.getName() and + ( + // A tree-sitter `scope_resolution` node with a `constant` name field is a + // read of that constant in any context where an identifier would be a + // vcall. + vcall(g) + or + explicitAssignmentNode(g, _) + ) + } or + TScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) { + isScopeResolutionMethodCall(g, i) + } or + TSelfReal(Ruby::Self g) or + TSelfSynth(AST::AstNode parent, int i) { mkSynthChild(SelfKind(), parent, i) } or + TSimpleParameter(Ruby::Identifier g) { g instanceof Parameter::Range } or + TSimpleSymbolLiteral(Ruby::SimpleSymbol g) or + TSingletonClass(Ruby::SingletonClass g) or + TSingletonMethod(Ruby::SingletonMethod g) or + TSpaceshipExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequalrangle } or + TSplatExprReal(Ruby::SplatArgument g) or + TSplatExprSynth(AST::AstNode parent, int i) { mkSynthChild(SplatExprKind(), parent, i) } or + TSplatParameter(Ruby::SplatParameter g) or + TStmtSequenceSynth(AST::AstNode parent, int i) { mkSynthChild(StmtSequenceKind(), parent, i) } or + TStringArrayLiteral(Ruby::StringArray g) or + TStringConcatenation(Ruby::ChainedString g) or + TStringEscapeSequenceComponent(Ruby::EscapeSequence g) or + TStringInterpolationComponent(Ruby::Interpolation g) or + TStringTextComponent(Ruby::Token g) { + g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent + } or + TSubExprReal(Ruby::Binary g) { g instanceof @ruby_binary_minus } or + TSubExprSynth(AST::AstNode parent, int i) { mkSynthChild(SubExprKind(), parent, i) } or + TSubshellLiteral(Ruby::Subshell g) or + TSymbolArrayLiteral(Ruby::SymbolArray g) or + TTernaryIfExpr(Ruby::Conditional g) or + TThen(Ruby::Then g) or + TTokenConstantAccess(Ruby::Constant g) { + // A tree-sitter `constant` token is a read of that constant in any context + // where an identifier would be a vcall. + vcall(g) + or + explicitAssignmentNode(g, _) + } or + TTokenMethodName(MethodName::Token g) { MethodName::range(g) } or + TTokenSuperCall(Ruby::Super g) { vcall(g) } or + TToplevel(Ruby::Program g) or + TTrueLiteral(Ruby::True g) or + TTuplePatternParameter(Ruby::DestructuredParameter g) or + TUnaryMinusExpr(Ruby::Unary g) { g instanceof @ruby_unary_minus } or + TUnaryPlusExpr(Ruby::Unary g) { g instanceof @ruby_unary_plus } or + TUndefStmt(Ruby::Undef g) or + TUnlessExpr(Ruby::Unless g) or + TUnlessModifierExpr(Ruby::UnlessModifier g) or + TUntilExpr(Ruby::Until g) or + TUntilModifierExpr(Ruby::UntilModifier g) or + TWhenExpr(Ruby::When g) or + TWhileExpr(Ruby::While g) or + TWhileModifierExpr(Ruby::WhileModifier g) or + TYieldCall(Ruby::Yield g) + + /** + * Gets the underlying TreeSitter entity for a given AST node. This does not + * include synthesized AST nodes, because they are not the primary AST node + * for any given generated node. + */ + cached + Ruby::AstNode toGenerated(AST::AstNode n) { + n = TAddExprReal(result) or + n = TAliasStmt(result) or + n = TArgumentList(result) or + n = TAssignAddExpr(result) or + n = TAssignBitwiseAndExpr(result) or + n = TAssignBitwiseOrExpr(result) or + n = TAssignBitwiseXorExpr(result) or + n = TAssignDivExpr(result) or + n = TAssignExponentExpr(result) or + n = TAssignExprReal(result) or + n = TAssignLShiftExpr(result) or + n = TAssignLogicalAndExpr(result) or + n = TAssignLogicalOrExpr(result) or + n = TAssignModuloExpr(result) or + n = TAssignMulExpr(result) or + n = TAssignRShiftExpr(result) or + n = TAssignSubExpr(result) or + n = TBareStringLiteral(result) or + n = TBareSymbolLiteral(result) or + n = TBeginBlock(result) or + n = TBeginExpr(result) or + n = TBitwiseAndExprReal(result) or + n = TBitwiseOrExprReal(result) or + n = TBitwiseXorExprReal(result) or + n = TBlockArgument(result) or + n = TBlockParameter(result) or + n = TBraceBlock(result) or + n = TBreakStmt(result) or + n = TCaseEqExpr(result) or + n = TCaseExpr(result) or + n = TCharacterLiteral(result) or + n = TClassDeclaration(result) or + n = TClassVariableAccessReal(result, _) or + n = TComplementExpr(result) or + n = TComplexLiteral(result) or + n = TDefinedExpr(result) or + n = TDelimitedSymbolLiteral(result) or + n = TDestructuredLeftAssignment(result) or + n = TDivExprReal(result) or + n = TDo(result) or + n = TDoBlock(result) or + n = TElementReference(result) or + n = TElse(result) or + n = TElsif(result) or + n = TEmptyStmt(result) or + n = TEndBlock(result) or + n = TEnsure(result) or + n = TEqExpr(result) or + n = TExponentExprReal(result) or + n = TFalseLiteral(result) or + n = TFloatLiteral(result) or + n = TForExpr(result) or + n = TForIn(result) or // TODO REMOVE + n = TForwardArgument(result) or + n = TForwardParameter(result) or + n = TGEExpr(result) or + n = TGTExpr(result) or + n = TGlobalVariableAccessReal(result, _) or + n = THashKeySymbolLiteral(result) or + n = THashLiteral(result) or + n = THashSplatExpr(result) or + n = THashSplatParameter(result) or + n = THereDoc(result) or + n = TIdentifierMethodCall(result) or + n = TIf(result) or + n = TIfModifierExpr(result) or + n = TInstanceVariableAccessReal(result, _) or + n = TIntegerLiteralReal(result) or + n = TKeywordParameter(result) or + n = TLEExpr(result) or + n = TLShiftExprReal(result) or + n = TLTExpr(result) or + n = TLambda(result) or + n = TLeftAssignmentList(result) or + n = TLocalVariableAccessReal(result, _) or + n = TLogicalAndExprReal(result) or + n = TLogicalOrExprReal(result) or + n = TMethod(result) or + n = TModuleDeclaration(result) or + n = TModuloExprReal(result) or + n = TMulExprReal(result) or + n = TNEExpr(result) or + n = TNextStmt(result) or + n = TNilLiteral(result) or + n = TNoRegExpMatchExpr(result) or + n = TNotExpr(result) or + n = TOptionalParameter(result) or + n = TPair(result) or + n = TParenthesizedExpr(result) or + n = TRShiftExprReal(result) or + n = TRangeLiteralReal(result) or + n = TRationalLiteral(result) or + n = TRedoStmt(result) or + n = TRegExpLiteral(result) or + n = TRegExpMatchExpr(result) or + n = TRegularArrayLiteral(result) or + n = TRegularMethodCall(result) or + n = TRegularStringLiteral(result) or + n = TRegularSuperCall(result) or + n = TRescueClause(result) or + n = TRescueModifierExpr(result) or + n = TRetryStmt(result) or + n = TReturnStmt(result) or + n = TScopeResolutionConstantAccess(result, _) or + n = TScopeResolutionMethodCall(result, _) or + n = TSelfReal(result) or + n = TSimpleParameter(result) or + n = TSimpleSymbolLiteral(result) or + n = TSingletonClass(result) or + n = TSingletonMethod(result) or + n = TSpaceshipExpr(result) or + n = TSplatExprReal(result) or + n = TSplatParameter(result) or + n = TStringArrayLiteral(result) or + n = TStringConcatenation(result) or + n = TStringEscapeSequenceComponent(result) or + n = TStringInterpolationComponent(result) or + n = TStringTextComponent(result) or + n = TSubExprReal(result) or + n = TSubshellLiteral(result) or + n = TSymbolArrayLiteral(result) or + n = TTernaryIfExpr(result) or + n = TThen(result) or + n = TTokenConstantAccess(result) or + n = TTokenMethodName(result) or + n = TTokenSuperCall(result) or + n = TToplevel(result) or + n = TTrueLiteral(result) or + n = TTuplePatternParameter(result) or + n = TUnaryMinusExpr(result) or + n = TUnaryPlusExpr(result) or + n = TUndefStmt(result) or + n = TUnlessExpr(result) or + n = TUnlessModifierExpr(result) or + n = TUntilExpr(result) or + n = TUntilModifierExpr(result) or + n = TWhenExpr(result) or + n = TWhileExpr(result) or + n = TWhileModifierExpr(result) or + n = TYieldCall(result) + } + + /** Gets the `i`th synthesized child of `parent`. */ + cached + AST::AstNode getSynthChild(AST::AstNode parent, int i) { + result = TAddExprSynth(parent, i) + or + result = TAssignExprSynth(parent, i) + or + result = TBitwiseAndExprSynth(parent, i) + or + result = TBitwiseOrExprSynth(parent, i) + or + result = TBitwiseXorExprSynth(parent, i) + or + result = TClassVariableAccessSynth(parent, i, _) + or + result = TConstantReadAccessSynth(parent, i, _) + or + result = TDivExprSynth(parent, i) + or + result = TExponentExprSynth(parent, i) + or + result = TGlobalVariableAccessSynth(parent, i, _) + or + result = TInstanceVariableAccessSynth(parent, i, _) + or + result = TIntegerLiteralSynth(parent, i, _) + or + result = TLShiftExprSynth(parent, i) + or + result = TLocalVariableAccessSynth(parent, i, _) + or + result = TLogicalAndExprSynth(parent, i) + or + result = TLogicalOrExprSynth(parent, i) + or + result = TMethodCallSynth(parent, i, _, _, _) + or + result = TModuloExprSynth(parent, i) + or + result = TMulExprSynth(parent, i) + or + result = TRangeLiteralSynth(parent, i, _) + or + result = TRShiftExprSynth(parent, i) + or + result = TSelfSynth(parent, i) + or + result = TSplatExprSynth(parent, i) + or + result = TStmtSequenceSynth(parent, i) + or + result = TSubExprSynth(parent, i) + } + + /** + * Holds if the `i`th child of `parent` is `child`. Either `parent` or + * `child` (or both) is a synthesized node. + */ + cached + predicate synthChild(AST::AstNode parent, int i, AST::AstNode child) { + child = getSynthChild(parent, i) + or + any(Synthesis s).child(parent, i, RealChild(child)) + } + + /** + * Like `toGenerated`, but also returns generated nodes for synthesized AST + * nodes. + */ + cached + Ruby::AstNode toGeneratedInclSynth(AST::AstNode n) { + result = toGenerated(n) + or + not exists(toGenerated(n)) and + exists(AST::AstNode parent | + synthChild(parent, _, n) and + result = toGeneratedInclSynth(parent) + ) + } + + cached + Location getLocation(AST::AstNode n) { + synthLocation(n, result) + or + n.isSynthesized() and + not synthLocation(n, _) and + result = getLocation(n.getParent()) + or + result = toGenerated(n).getLocation() + } +} + +import Cached + +TAstNode fromGenerated(Ruby::AstNode n) { n = toGenerated(result) } + +class TCall = TMethodCall or TYieldCall; + +class TMethodCall = + TMethodCallSynth or TIdentifierMethodCall or TScopeResolutionMethodCall or TRegularMethodCall or + TElementReference or TSuperCall or TUnaryOperation or TBinaryOperation; + +class TSuperCall = TTokenSuperCall or TRegularSuperCall; + +class TConstantAccess = + TTokenConstantAccess or TScopeResolutionConstantAccess or TNamespace or TConstantReadAccessSynth; + +class TControlExpr = TConditionalExpr or TCaseExpr or TLoop; + +class TConditionalExpr = + TIfExpr or TUnlessExpr or TIfModifierExpr or TUnlessModifierExpr or TTernaryIfExpr; + +class TIfExpr = TIf or TElsif; + +class TConditionalLoop = TWhileExpr or TUntilExpr or TWhileModifierExpr or TUntilModifierExpr; + +class TLoop = TConditionalLoop or TForExpr; + +class TSelf = TSelfReal or TSelfSynth; + +class TExpr = + TSelf or TArgumentList or TRescueClause or TRescueModifierExpr or TPair or TStringConcatenation or + TCall or TBlockArgument or TConstantAccess or TControlExpr or TWhenExpr or TLiteral or + TCallable or TVariableAccess or TStmtSequence or TOperation or TSimpleParameter or + TForwardArgument; + +class TSplatExpr = TSplatExprReal or TSplatExprSynth; + +class TStmtSequence = + TBeginBlock or TEndBlock or TThen or TElse or TDo or TEnsure or TStringInterpolationComponent or + TBlock or TBodyStmt or TParenthesizedExpr or TStmtSequenceSynth; + +class TBodyStmt = TBeginExpr or TModuleBase or TMethod or TLambda or TDoBlock or TSingletonMethod; + +class TLiteral = + TNumericLiteral or TNilLiteral or TBooleanLiteral or TStringlikeLiteral or TCharacterLiteral or + TArrayLiteral or THashLiteral or TRangeLiteral or TTokenMethodName; + +class TNumericLiteral = TIntegerLiteral or TFloatLiteral or TRationalLiteral or TComplexLiteral; + +class TIntegerLiteral = TIntegerLiteralReal or TIntegerLiteralSynth; + +class TBooleanLiteral = TTrueLiteral or TFalseLiteral; + +class TStringComponent = + TStringTextComponent or TStringEscapeSequenceComponent or TStringInterpolationComponent; + +class TStringlikeLiteral = + TStringLiteral or TRegExpLiteral or TSymbolLiteral or TSubshellLiteral or THereDoc; + +class TStringLiteral = TRegularStringLiteral or TBareStringLiteral; + +class TSymbolLiteral = TSimpleSymbolLiteral or TComplexSymbolLiteral or THashKeySymbolLiteral; + +class TComplexSymbolLiteral = TDelimitedSymbolLiteral or TBareSymbolLiteral; + +class TArrayLiteral = TRegularArrayLiteral or TStringArrayLiteral or TSymbolArrayLiteral; + +class TCallable = TMethodBase or TLambda or TBlock; + +class TMethodBase = TMethod or TSingletonMethod; + +class TBlock = TDoBlock or TBraceBlock; + +class TModuleBase = TToplevel or TNamespace or TSingletonClass; + +class TNamespace = TClassDeclaration or TModuleDeclaration; + +class TOperation = TUnaryOperation or TBinaryOperation or TAssignment; + +class TUnaryOperation = + TUnaryLogicalOperation or TUnaryArithmeticOperation or TUnaryBitwiseOperation or TDefinedExpr or + TSplatExpr or THashSplatExpr; + +class TUnaryLogicalOperation = TNotExpr; + +class TUnaryArithmeticOperation = TUnaryPlusExpr or TUnaryMinusExpr; + +class TUnaryBitwiseOperation = TComplementExpr; + +class TBinaryOperation = + TBinaryArithmeticOperation or TBinaryLogicalOperation or TBinaryBitwiseOperation or + TComparisonOperation or TSpaceshipExpr or TRegExpMatchExpr or TNoRegExpMatchExpr; + +class TBinaryArithmeticOperation = + TAddExpr or TSubExpr or TMulExpr or TDivExpr or TModuloExpr or TExponentExpr; + +class TAddExpr = TAddExprReal or TAddExprSynth; + +class TSubExpr = TSubExprReal or TSubExprSynth; + +class TMulExpr = TMulExprReal or TMulExprSynth; + +class TDivExpr = TDivExprReal or TDivExprSynth; + +class TModuloExpr = TModuloExprReal or TModuloExprSynth; + +class TExponentExpr = TExponentExprReal or TExponentExprSynth; + +class TBinaryLogicalOperation = TLogicalAndExpr or TLogicalOrExpr; + +class TLogicalAndExpr = TLogicalAndExprReal or TLogicalAndExprSynth; + +class TLogicalOrExpr = TLogicalOrExprReal or TLogicalOrExprSynth; + +class TBinaryBitwiseOperation = + TLShiftExpr or TRShiftExpr or TBitwiseAndExpr or TBitwiseOrExpr or TBitwiseXorExpr; + +class TLShiftExpr = TLShiftExprReal or TLShiftExprSynth; + +class TRangeLiteral = TRangeLiteralReal or TRangeLiteralSynth; + +class TRShiftExpr = TRShiftExprReal or TRShiftExprSynth; + +class TBitwiseAndExpr = TBitwiseAndExprReal or TBitwiseAndExprSynth; + +class TBitwiseOrExpr = TBitwiseOrExprReal or TBitwiseOrExprSynth; + +class TBitwiseXorExpr = TBitwiseXorExprReal or TBitwiseXorExprSynth; + +class TComparisonOperation = TEqualityOperation or TRelationalOperation; + +class TEqualityOperation = TEqExpr or TNEExpr or TCaseEqExpr; + +class TRelationalOperation = TGTExpr or TGEExpr or TLTExpr or TLEExpr; + +class TAssignExpr = TAssignExprReal or TAssignExprSynth; + +class TAssignment = TAssignExpr or TAssignOperation; + +class TAssignOperation = + TAssignArithmeticOperation or TAssignLogicalOperation or TAssignBitwiseOperation; + +class TAssignArithmeticOperation = + TAssignAddExpr or TAssignSubExpr or TAssignMulExpr or TAssignDivExpr or TAssignModuloExpr or + TAssignExponentExpr; + +class TAssignLogicalOperation = TAssignLogicalAndExpr or TAssignLogicalOrExpr; + +class TAssignBitwiseOperation = + TAssignLShiftExpr or TAssignRShiftExpr or TAssignBitwiseAndExpr or TAssignBitwiseOrExpr or + TAssignBitwiseXorExpr; + +class TStmt = + TEmptyStmt or TBodyStmt or TStmtSequence or TUndefStmt or TAliasStmt or TReturningStmt or + TRedoStmt or TRetryStmt or TExpr; + +class TReturningStmt = TReturnStmt or TBreakStmt or TNextStmt; + +class TParameter = + TPatternParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or + TOptionalParameter or TSplatParameter or TForwardParameter; + +class TPatternParameter = TSimpleParameter or TTuplePatternParameter; + +class TNamedParameter = + TSimpleParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or + TOptionalParameter or TSplatParameter; + +class TTuplePattern = TTuplePatternParameter or TDestructuredLeftAssignment or TLeftAssignmentList; + +class TVariableAccess = + TLocalVariableAccess or TGlobalVariableAccess or TInstanceVariableAccess or TClassVariableAccess; + +class TLocalVariableAccess = TLocalVariableAccessReal or TLocalVariableAccessSynth; + +class TGlobalVariableAccess = TGlobalVariableAccessReal or TGlobalVariableAccessSynth; + +class TInstanceVariableAccess = TInstanceVariableAccessReal or TInstanceVariableAccessSynth; + +class TClassVariableAccess = TClassVariableAccessReal or TClassVariableAccessSynth; diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll new file mode 100644 index 00000000000..43681e1d58f --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll @@ -0,0 +1,186 @@ +private import TreeSitter +private import Variable +private import codeql.ruby.AST +private import codeql.ruby.ast.internal.AST + +predicate isIdentifierMethodCall(Ruby::Identifier g) { vcall(g) and not access(g, _) } + +predicate isRegularMethodCall(Ruby::Call g) { not g.getMethod() instanceof Ruby::Super } + +predicate isScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) { + i = g.getName() and + not exists(Ruby::Call c | c.getMethod() = g) +} + +abstract class CallImpl extends Expr, TCall { + abstract AstNode getArgumentImpl(int n); + + /** + * It is not possible to define this predicate as + * + * ```ql + * result = count(this.getArgumentImpl(_)) + * ``` + * + * since that will result in a non-monotonicity error. + */ + abstract int getNumberOfArgumentsImpl(); +} + +abstract class MethodCallImpl extends CallImpl, TMethodCall { + abstract AstNode getReceiverImpl(); + + abstract string getMethodNameImpl(); + + abstract Block getBlockImpl(); +} + +class MethodCallSynth extends MethodCallImpl, TMethodCallSynth { + final override string getMethodNameImpl() { + exists(boolean setter, string name | this = TMethodCallSynth(_, _, name, setter, _) | + setter = true and result = name + "=" + or + setter = false and result = name + ) + } + + final override AstNode getReceiverImpl() { synthChild(this, 0, result) } + + final override AstNode getArgumentImpl(int n) { synthChild(this, n + 1, result) and n >= 0 } + + final override int getNumberOfArgumentsImpl() { this = TMethodCallSynth(_, _, _, _, result) } + + final override Block getBlockImpl() { none() } +} + +class IdentifierMethodCall extends MethodCallImpl, TIdentifierMethodCall { + private Ruby::Identifier g; + + IdentifierMethodCall() { this = TIdentifierMethodCall(g) } + + final override string getMethodNameImpl() { result = g.getValue() } + + final override AstNode getReceiverImpl() { result = TSelfSynth(this, 0) } + + final override Expr getArgumentImpl(int n) { none() } + + final override int getNumberOfArgumentsImpl() { result = 0 } + + final override Block getBlockImpl() { none() } +} + +class ScopeResolutionMethodCall extends MethodCallImpl, TScopeResolutionMethodCall { + private Ruby::ScopeResolution g; + private Ruby::Identifier i; + + ScopeResolutionMethodCall() { this = TScopeResolutionMethodCall(g, i) } + + final override string getMethodNameImpl() { result = i.getValue() } + + final override Expr getReceiverImpl() { toGenerated(result) = g.getScope() } + + final override Expr getArgumentImpl(int n) { none() } + + final override int getNumberOfArgumentsImpl() { result = 0 } + + final override Block getBlockImpl() { none() } +} + +class RegularMethodCall extends MethodCallImpl, TRegularMethodCall { + private Ruby::Call g; + + RegularMethodCall() { this = TRegularMethodCall(g) } + + final override Expr getReceiverImpl() { + toGenerated(result) = g.getReceiver() + or + not exists(g.getReceiver()) and + toGenerated(result) = g.getMethod().(Ruby::ScopeResolution).getScope() + or + result = TSelfSynth(this, 0) + } + + final override string getMethodNameImpl() { + isRegularMethodCall(g) and + ( + result = "call" and g.getMethod() instanceof Ruby::ArgumentList + or + result = g.getMethod().(Ruby::Token).getValue() + or + result = g.getMethod().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue() + ) + } + + final override Expr getArgumentImpl(int n) { + toGenerated(result) = g.getArguments().getChild(n) + or + toGenerated(result) = g.getMethod().(Ruby::ArgumentList).getChild(n) + } + + final override int getNumberOfArgumentsImpl() { + result = + count(g.getArguments().getChild(_)) + count(g.getMethod().(Ruby::ArgumentList).getChild(_)) + } + + final override Block getBlockImpl() { toGenerated(result) = g.getBlock() } +} + +class ElementReferenceImpl extends MethodCallImpl, TElementReference { + private Ruby::ElementReference g; + + ElementReferenceImpl() { this = TElementReference(g) } + + final override Expr getReceiverImpl() { toGenerated(result) = g.getObject() } + + final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild(n) } + + final override int getNumberOfArgumentsImpl() { result = count(g.getChild(_)) } + + final override string getMethodNameImpl() { result = "[]" } + + final override Block getBlockImpl() { none() } +} + +abstract class SuperCallImpl extends MethodCallImpl, TSuperCall { } + +class TokenSuperCall extends SuperCallImpl, TTokenSuperCall { + private Ruby::Super g; + + TokenSuperCall() { this = TTokenSuperCall(g) } + + final override string getMethodNameImpl() { result = g.getValue() } + + final override Expr getReceiverImpl() { none() } + + final override Expr getArgumentImpl(int n) { none() } + + final override int getNumberOfArgumentsImpl() { result = 0 } + + final override Block getBlockImpl() { none() } +} + +class RegularSuperCall extends SuperCallImpl, TRegularSuperCall { + private Ruby::Call g; + + RegularSuperCall() { this = TRegularSuperCall(g) } + + final override string getMethodNameImpl() { result = g.getMethod().(Ruby::Super).getValue() } + + final override Expr getReceiverImpl() { none() } + + final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getArguments().getChild(n) } + + final override int getNumberOfArgumentsImpl() { result = count(g.getArguments().getChild(_)) } + + final override Block getBlockImpl() { toGenerated(result) = g.getBlock() } +} + +class YieldCallImpl extends CallImpl, TYieldCall { + Ruby::Yield g; + + YieldCallImpl() { this = TYieldCall(g) } + + final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild().getChild(n) } + + final override int getNumberOfArgumentsImpl() { result = count(g.getChild().getChild(_)) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll new file mode 100644 index 00000000000..7a69bf5b783 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll @@ -0,0 +1,43 @@ +import codeql.Locations +private import TreeSitter +private import codeql.ruby.ast.Erb + +cached +private module Cached { + cached + newtype TAstNode = + TCommentDirective(Erb::CommentDirective g) or + TDirective(Erb::Directive g) or + TGraphqlDirective(Erb::GraphqlDirective g) or + TOutputDirective(Erb::OutputDirective g) or + TTemplate(Erb::Template g) or + TToken(Erb::Token g) or + TComment(Erb::Comment g) or + TCode(Erb::Code g) + + /** + * Gets the underlying TreeSitter entity for a given erb AST node. + */ + cached + Erb::AstNode toGenerated(ErbAstNode n) { + n = TCommentDirective(result) or + n = TDirective(result) or + n = TGraphqlDirective(result) or + n = TOutputDirective(result) or + n = TTemplate(result) or + n = TToken(result) or + n = TComment(result) or + n = TCode(result) + } + + cached + Location getLocation(ErbAstNode n) { result = toGenerated(n).getLocation() } +} + +import Cached + +TAstNode fromGenerated(Erb::AstNode n) { n = toGenerated(result) } + +class TDirectiveNode = TCommentDirective or TDirective or TGraphqlDirective or TOutputDirective; + +class TTokenNode = TToken or TComment or TCode; diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll new file mode 100644 index 00000000000..247573b59e5 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll @@ -0,0 +1,409 @@ +private import codeql.Locations +private import codeql.ruby.AST +private import codeql.ruby.ast.Call +private import codeql.ruby.ast.Constant +private import codeql.ruby.ast.Expr +private import codeql.ruby.ast.Module +private import codeql.ruby.ast.Operation +private import codeql.ruby.ast.Scope + +// Names of built-in modules and classes +private string builtin() { + result = + [ + "Object", "Kernel", "BasicObject", "Class", "Module", "NilClass", "FalseClass", "TrueClass", + "Numeric", "Integer", "Float", "Rational", "Complex", "Array", "Hash", "Symbol", "Proc" + ] +} + +cached +private module Cached { + cached + newtype TModule = + TResolved(string qName) { + qName = builtin() + or + qName = namespaceDeclaration(_) + } or + TUnresolved(Namespace n) { not exists(namespaceDeclaration(n)) } + + cached + string namespaceDeclaration(Namespace n) { + isToplevel(n) and result = n.getName() + or + not isToplevel(n) and + not exists(n.getScopeExpr()) and + result = scopeAppend(namespaceDeclaration(n.getEnclosingModule()), n.getName()) + or + exists(string container | + TResolved(container) = resolveScopeExpr(n.getScopeExpr()) and + result = scopeAppend(container, n.getName()) + ) + } + + cached + Module getSuperClass(Module cls) { + cls = TResolved("Object") and result = TResolved("BasicObject") + or + cls = TResolved(["Module", "Numeric", "Array", "Hash", "FalseClass", "TrueClass", "NilClass"]) and + result = TResolved("Object") + or + cls = TResolved(["Integer", "Float", "Rational", "Complex"]) and + result = TResolved("Numeric") + or + cls = TResolved("Class") and + result = TResolved("Module") + or + not cls = TResolved(builtin()) and + ( + exists(ClassDeclaration d | + d = cls.getADeclaration() and + result = resolveScopeExpr(d.getSuperclassExpr()) + ) + or + result = TResolved("Object") and + forex(ClassDeclaration d | d = cls.getADeclaration() | + not exists(resolveScopeExpr(d.getSuperclassExpr())) + ) + ) + } + + cached + Module getAnIncludedModule(Module m) { + m = TResolved("Object") and result = TResolved("Kernel") + or + exists(IncludeOrPrependCall c | + c.getMethodName() = "include" and + ( + m = resolveScopeExpr(c.getReceiver()) + or + m = enclosingModule(c).getModule() and + c.getReceiver() instanceof Self + ) and + result = resolveScopeExpr(c.getAnArgument()) + ) + } + + cached + Module getAPrependedModule(Module m) { + exists(IncludeOrPrependCall c | + c.getMethodName() = "prepend" and + ( + m = resolveScopeExpr(c.getReceiver()) + or + m = enclosingModule(c).getModule() and + c.getReceiver() instanceof Self + ) and + result = resolveScopeExpr(c.getAnArgument()) + ) + } + + /** + * Resolve class or module read access to a qualified module name. + */ + cached + TResolved resolveScopeExpr(ConstantReadAccess r) { + exists(string qname | qname = resolveConstant(r) and result = TResolved(qname)) + } + + /** + * Resolve constant access (class, module or otherwise) to a qualified module name. + * `resolveScopeExpr/1` picks the best (lowest priority number) result of + * `resolveScopeExpr/2` that resolves to a constant definition. If the constant + * definition is a Namespace then it is returned, if it's a constant assignment then + * the right-hand side of the assignment is resolved. + */ + cached + string resolveConstant(ConstantReadAccess r) { + exists(string qname | + qname = + min(string qn, int p | + isDefinedConstant(qn) and + qn = resolveScopeExpr(r, p) and + // prevent classes/modules that contain/extend themselves + not exists(ConstantWriteAccess w | qn = constantDefinition0(w) | + r = w.getScopeExpr() + or + r = w.(ClassDeclaration).getSuperclassExpr() + ) + | + qn order by p + ) + | + result = qname + or + exists(ConstantAssignment a | + qname = constantDefinition0(a) and + result = resolveConstant(a.getParent().(Assignment).getRightOperand()) + ) + ) + } + + cached + Method lookupMethod(Module m, string name) { TMethod(result) = lookupMethodOrConst(m, name) } + + cached + Expr lookupConst(Module m, string name) { + TExpr(result) = lookupMethodOrConst(m, name) + or + exists(AssignExpr ae, ConstantWriteAccess w | + w = ae.getLeftOperand() and + w.getName() = name and + m = resolveScopeExpr(w.getScopeExpr()) and + result = ae.getRightOperand() + ) + } +} + +import Cached + +private predicate isToplevel(ConstantAccess n) { + not exists(n.getScopeExpr()) and + ( + n.hasGlobalScope() + or + n.getEnclosingModule() instanceof Toplevel + ) +} + +private predicate isDefinedConstant(string qualifiedModuleName) { + qualifiedModuleName = [builtin(), constantDefinition0(_)] +} + +private int maxDepth() { result = 1 + max(int level | exists(enclosing(_, level))) } + +private ModuleBase enclosing(ModuleBase m, int level) { + result = m and level = 0 + or + result = enclosing(m.getEnclosingModule(), level - 1) +} + +pragma[noinline] +private Namespace enclosingNameSpaceConstantReadAccess( + ConstantReadAccess c, int priority, string name +) { + result = enclosing(c.getEnclosingModule(), priority) and + name = c.getName() +} + +/** + * Resolve constant read access (typically a scope expression) to a qualified name. The + * `priority` value indicates the precedence of the solution with respect to the lookup order. + * A constant name without scope specifier is resolved against its enclosing modules (inner-most first); + * if the constant is not found in any of the enclosing modules, then the constant will be resolved + * with respect to the ancestors (prepends, includes, super classes, and their ancestors) of the + * directly enclosing module. + */ +private string resolveScopeExpr(ConstantReadAccess c, int priority) { + c.hasGlobalScope() and result = c.getName() and priority = 0 + or + exists(string name | + result = qualifiedModuleName(resolveScopeExprConstantReadAccess(c, priority, name), name) + ) + or + not exists(c.getScopeExpr()) and + not c.hasGlobalScope() and + ( + exists(string name | + exists(Namespace n | + n = enclosingNameSpaceConstantReadAccess(c, priority, name) and + result = qualifiedModuleName(constantDefinition0(n), name) + ) + or + result = + qualifiedModuleName(ancestors(qualifiedModuleNameConstantReadAccess(c, name), + priority - maxDepth()), name) + ) + or + priority = maxDepth() + 4 and + qualifiedModuleNameConstantReadAccess(c, result) != "BasicObject" + ) +} + +pragma[nomagic] +private string resolveScopeExprConstantReadAccess(ConstantReadAccess c, int priority, string name) { + result = resolveScopeExpr(c.getScopeExpr(), priority) and + name = c.getName() +} + +bindingset[qualifier, name] +private string scopeAppend(string qualifier, string name) { + if qualifier = "Object" then result = name else result = qualifier + "::" + name +} + +private string qualifiedModuleName(ModuleBase m) { + result = "Object" and m instanceof Toplevel + or + result = constantDefinition0(m) +} + +pragma[noinline] +private string qualifiedModuleNameConstantWriteAccess(ConstantWriteAccess c, string name) { + result = qualifiedModuleName(c.getEnclosingModule()) and + name = c.getName() +} + +pragma[noinline] +private string qualifiedModuleNameConstantReadAccess(ConstantReadAccess c, string name) { + result = qualifiedModuleName(c.getEnclosingModule()) and + name = c.getName() +} + +/** + * Get a qualified name for a constant definition. May return multiple qualified + * names because we over-approximate when resolving scope resolutions and ignore + * lookup order precedence. Taking lookup order into account here would lead to + * non-monotonic recursion. + */ +private string constantDefinition0(ConstantWriteAccess c) { + c.hasGlobalScope() and result = c.getName() + or + result = scopeAppend(resolveScopeExpr(c.getScopeExpr(), _), c.getName()) + or + not exists(c.getScopeExpr()) and + not c.hasGlobalScope() and + exists(string name | result = scopeAppend(qualifiedModuleNameConstantWriteAccess(c, name), name)) +} + +/** + * The qualified names of the ancestors of a class/module. The ancestors should be an ordered list + * of the ancestores of `prepend`ed modules, the module itself , the ancestors or `include`d modules + * and the ancestors of the super class. The priority value only distinguishes the kind of ancestor, + * it does not order the ancestors within a group of the same kind. This is an over-approximation, however, + * computing the precise order is tricky because it depends on the evaluation/file loading order. + */ +// TODO: the order of super classes can be determined more precisely even without knowing the evaluation +// order, so we should be able to make this more precise. +private string ancestors(string qname, int priority) { + result = ancestors(prepends(qname), _) and priority = 0 + or + result = qname and priority = 1 and isDefinedConstant(qname) + or + result = ancestors(includes(qname), _) and priority = 2 + or + result = ancestors(superclass(qname), _) and priority = 3 +} + +private class IncludeOrPrependCall extends MethodCall { + IncludeOrPrependCall() { this.getMethodName() = ["include", "prepend"] } + + string getAModule() { result = resolveScopeExpr(this.getAnArgument(), _) } + + string getTarget() { + result = resolveScopeExpr(this.getReceiver(), _) + or + result = qualifiedModuleName(enclosingModule(this)) and + ( + this.getReceiver() instanceof Self + or + not exists(this.getReceiver()) + ) + } +} + +/** + * A variant of AstNode::getEnclosingModule that excludes + * results that are enclosed in a block. This is a bit wrong because + * it could lead to false negatives. However, `include` statements in + * blocks are very rare in normal code. The majority of cases are in calls + * to methods like `module_eval` and `Rspec.describe` / `Rspec.context`. These + * methods evaluate the block in the context of some other module/class instead of + * the enclosing one. + */ +private ModuleBase enclosingModule(AstNode node) { result = parent*(node).getParent() } + +private AstNode parent(AstNode n) { + result = n.getParent() and + not result instanceof ModuleBase and + not result instanceof Block +} + +private string prepends(string qname) { + exists(IncludeOrPrependCall m | + m.getMethodName() = "prepend" and + qname = m.getTarget() and + result = m.getAModule() + ) +} + +private string includes(string qname) { + qname = "Object" and + result = "Kernel" + or + exists(IncludeOrPrependCall m | + m.getMethodName() = "include" and + qname = m.getTarget() and + result = m.getAModule() + ) +} + +private Expr superexpr(string qname) { + exists(ClassDeclaration c | qname = constantDefinition0(c) and result = c.getSuperclassExpr()) +} + +private string superclass(string qname) { + qname = "Object" and result = "BasicObject" + or + result = resolveScopeExpr(superexpr(qname), _) +} + +private string qualifiedModuleName(string container, string name) { + isDefinedConstant(result) and + ( + container = result.regexpCapture("(.+)::([^:]+)", 1) and + name = result.regexpCapture("(.+)::([^:]+)", 2) + or + container = "Object" and name = result + ) +} + +private Module getAncestors(Module m) { + result = m or + result = getAncestors(m.getAnIncludedModule()) or + result = getAncestors(m.getAPrependedModule()) +} + +private newtype TMethodOrExpr = + TMethod(Method m) or + TExpr(Expr e) + +private TMethodOrExpr getMethodOrConst(TModule owner, string name) { + exists(ModuleBase m | m.getModule() = owner | + result = TMethod(m.getMethod(name)) + or + result = TExpr(m.getConstant(name)) + ) +} + +module ExposedForTestingOnly { + Method getMethod(TModule owner, string name) { TMethod(result) = getMethodOrConst(owner, name) } + + Expr getConst(TModule owner, string name) { TExpr(result) = getMethodOrConst(owner, name) } +} + +private TMethodOrExpr lookupMethodOrConst0(Module m, string name) { + result = lookupMethodOrConst0(m.getAPrependedModule(), name) + or + not exists(getMethodOrConst(getAncestors(m.getAPrependedModule()), name)) and + ( + result = getMethodOrConst(m, name) + or + not exists(getMethodOrConst(m, name)) and + result = lookupMethodOrConst0(m.getAnIncludedModule(), name) + ) +} + +private AstNode getNode(TMethodOrExpr e) { e = TMethod(result) or e = TExpr(result) } + +private TMethodOrExpr lookupMethodOrConst(Module m, string name) { + result = lookupMethodOrConst0(m, name) + or + not exists(lookupMethodOrConst0(m, name)) and + result = lookupMethodOrConst(m.getSuperClass(), name) and + // For now, we restrict the scope of top-level declarations to their file. + // This may remove some plausible targets, but also removes a lot of + // implausible targets + if getNode(result).getEnclosingModule() instanceof Toplevel + then getNode(result).getFile() = m.getADeclaration().getFile() + else any() +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll new file mode 100644 index 00000000000..3571c97e9dc --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll @@ -0,0 +1,198 @@ +private import codeql.ruby.AST +private import AST +private import TreeSitter +private import Call + +abstract class OperationImpl extends Expr, TOperation { + abstract string getOperatorImpl(); + + abstract Expr getAnOperandImpl(); +} + +abstract class UnaryOperationImpl extends OperationImpl, MethodCallImpl, TUnaryOperation { + abstract Expr getOperandImpl(); + + final override Expr getAnOperandImpl() { result = this.getOperandImpl() } + + final override string getMethodNameImpl() { result = this.getOperatorImpl() } + + final override AstNode getReceiverImpl() { result = this.getOperandImpl() } + + final override Expr getArgumentImpl(int n) { none() } + + final override int getNumberOfArgumentsImpl() { result = 0 } + + final override Block getBlockImpl() { none() } +} + +class UnaryOperationGenerated extends UnaryOperationImpl { + private Ruby::Unary g; + + UnaryOperationGenerated() { g = toGenerated(this) } + + final override Expr getOperandImpl() { toGenerated(result) = g.getOperand() } + + final override string getOperatorImpl() { result = g.getOperator() } +} + +class SplatExprReal extends UnaryOperationImpl, TSplatExprReal { + private Ruby::SplatArgument g; + + SplatExprReal() { this = TSplatExprReal(g) } + + final override string getOperatorImpl() { result = "*" } + + final override Expr getOperandImpl() { toGenerated(result) = g.getChild() } +} + +class SplatExprSynth extends UnaryOperationImpl, TSplatExprSynth { + final override string getOperatorImpl() { result = "*" } + + final override Expr getOperandImpl() { synthChild(this, 0, result) } +} + +class HashSplatExprImpl extends UnaryOperationImpl, THashSplatExpr { + private Ruby::HashSplatArgument g; + + HashSplatExprImpl() { this = THashSplatExpr(g) } + + final override Expr getOperandImpl() { toGenerated(result) = g.getChild() } + + final override string getOperatorImpl() { result = "**" } +} + +abstract class BinaryOperationImpl extends OperationImpl, MethodCallImpl, TBinaryOperation { + abstract Stmt getLeftOperandImpl(); + + abstract Stmt getRightOperandImpl(); + + final override Expr getAnOperandImpl() { + result = this.getLeftOperandImpl() + or + result = this.getRightOperandImpl() + } + + final override string getMethodNameImpl() { result = this.getOperatorImpl() } + + final override AstNode getReceiverImpl() { result = this.getLeftOperandImpl() } + + final override Expr getArgumentImpl(int n) { n = 0 and result = this.getRightOperandImpl() } + + final override int getNumberOfArgumentsImpl() { result = 1 } + + final override Block getBlockImpl() { none() } +} + +class BinaryOperationReal extends BinaryOperationImpl { + private Ruby::Binary g; + + BinaryOperationReal() { g = toGenerated(this) } + + final override string getOperatorImpl() { result = g.getOperator() } + + final override Stmt getLeftOperandImpl() { toGenerated(result) = g.getLeft() } + + final override Stmt getRightOperandImpl() { toGenerated(result) = g.getRight() } +} + +abstract class BinaryOperationSynth extends BinaryOperationImpl { + final override Stmt getLeftOperandImpl() { synthChild(this, 0, result) } + + final override Stmt getRightOperandImpl() { synthChild(this, 1, result) } +} + +class AddExprSynth extends BinaryOperationSynth, TAddExprSynth { + final override string getOperatorImpl() { result = "+" } +} + +class SubExprSynth extends BinaryOperationSynth, TSubExprSynth { + final override string getOperatorImpl() { result = "-" } +} + +class MulExprSynth extends BinaryOperationSynth, TMulExprSynth { + final override string getOperatorImpl() { result = "*" } +} + +class DivExprSynth extends BinaryOperationSynth, TDivExprSynth { + final override string getOperatorImpl() { result = "/" } +} + +class ModuloExprSynth extends BinaryOperationSynth, TModuloExprSynth { + final override string getOperatorImpl() { result = "%" } +} + +class ExponentExprSynth extends BinaryOperationSynth, TExponentExprSynth { + final override string getOperatorImpl() { result = "**" } +} + +class LogicalAndExprSynth extends BinaryOperationSynth, TLogicalAndExprSynth { + final override string getOperatorImpl() { result = "&&" } +} + +class LogicalOrExprSynth extends BinaryOperationSynth, TLogicalOrExprSynth { + final override string getOperatorImpl() { result = "||" } +} + +class LShiftExprSynth extends BinaryOperationSynth, TLShiftExprSynth { + final override string getOperatorImpl() { result = "<<" } +} + +class RShiftExprSynth extends BinaryOperationSynth, TRShiftExprSynth { + final override string getOperatorImpl() { result = ">>" } +} + +class BitwiseAndSynthExpr extends BinaryOperationSynth, TBitwiseAndExprSynth { + final override string getOperatorImpl() { result = "&" } +} + +class BitwiseOrSynthExpr extends BinaryOperationSynth, TBitwiseOrExprSynth { + final override string getOperatorImpl() { result = "|" } +} + +class BitwiseXorSynthExpr extends BinaryOperationSynth, TBitwiseXorExprSynth { + final override string getOperatorImpl() { result = "^" } +} + +abstract class AssignmentImpl extends OperationImpl, TAssignment { + abstract Pattern getLeftOperandImpl(); + + abstract Expr getRightOperandImpl(); + + final override Expr getAnOperandImpl() { + result = this.getLeftOperandImpl() + or + result = this.getRightOperandImpl() + } +} + +class AssignExprReal extends AssignmentImpl, TAssignExprReal { + private Ruby::Assignment g; + + AssignExprReal() { this = TAssignExprReal(g) } + + final override string getOperatorImpl() { result = "=" } + + final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() } + + final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() } +} + +class AssignExprSynth extends AssignmentImpl, TAssignExprSynth { + final override string getOperatorImpl() { result = "=" } + + final override Pattern getLeftOperandImpl() { synthChild(this, 0, result) } + + final override Expr getRightOperandImpl() { synthChild(this, 1, result) } +} + +class AssignOperationImpl extends AssignmentImpl, TAssignOperation { + Ruby::OperatorAssignment g; + + AssignOperationImpl() { g = toGenerated(this) } + + final override string getOperatorImpl() { result = g.getOperator() } + + final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() } + + final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll new file mode 100644 index 00000000000..f888d89c1ac --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll @@ -0,0 +1,19 @@ +private import codeql.ruby.AST +private import AST +private import TreeSitter + +module Parameter { + class Range extends Ruby::AstNode { + private int pos; + + Range() { + this = any(Ruby::BlockParameters bp).getChild(pos) + or + this = any(Ruby::MethodParameters mp).getChild(pos) + or + this = any(Ruby::LambdaParameters lp).getChild(pos) + } + + int getPosition() { result = pos } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll new file mode 100644 index 00000000000..ce18e77f222 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll @@ -0,0 +1,32 @@ +private import codeql.ruby.AST +private import AST +private import TreeSitter + +abstract class TuplePatternImpl extends Ruby::AstNode { + abstract Ruby::AstNode getChildNode(int i); + + final int getRestIndex() { + result = unique(int i | this.getChildNode(i) instanceof Ruby::RestAssignment) + } +} + +class TuplePatternParameterImpl extends TuplePatternImpl, Ruby::DestructuredParameter { + override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) } +} + +class DestructuredLeftAssignmentImpl extends TuplePatternImpl, Ruby::DestructuredLeftAssignment { + override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) } +} + +class LeftAssignmentListImpl extends TuplePatternImpl, Ruby::LeftAssignmentList { + override Ruby::AstNode getChildNode(int i) { + this = + any(Ruby::LeftAssignmentList lal | + if + strictcount(int j | exists(lal.getChild(j))) = 1 and + lal.getChild(0) instanceof Ruby::DestructuredLeftAssignment + then result = lal.getChild(0).(Ruby::DestructuredLeftAssignment).getChild(i) + else result = lal.getChild(i) + ) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll new file mode 100644 index 00000000000..1cc64fac885 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll @@ -0,0 +1,109 @@ +private import TreeSitter +private import codeql.ruby.ast.Scope +private import codeql.ruby.ast.internal.AST +private import codeql.ruby.ast.internal.Parameter + +class TScopeType = TMethodBase or TModuleLike or TBlockLike; + +private class TBlockLike = TDoBlock or TLambda or TBlock or TEndBlock; + +private class TModuleLike = TToplevel or TModuleDeclaration or TClassDeclaration or TSingletonClass; + +module Scope { + class TypeRange = Callable::TypeRange or ModuleBase::TypeRange or @ruby_end_block; + + class Range extends Ruby::AstNode, TypeRange { + Range() { not this = any(Ruby::Lambda l).getBody() } + + ModuleBase::Range getEnclosingModule() { + result = this + or + not this instanceof ModuleBase::Range and result = this.getOuterScope().getEnclosingModule() + } + + MethodBase::Range getEnclosingMethod() { + result = this + or + not this instanceof MethodBase::Range and + not this instanceof ModuleBase::Range and + result = this.getOuterScope().getEnclosingMethod() + } + + Range getOuterScope() { result = scopeOf(this) } + } +} + +module MethodBase { + class TypeRange = @ruby_method or @ruby_singleton_method; + + class Range extends Scope::Range, TypeRange { } +} + +module Callable { + class TypeRange = MethodBase::TypeRange or @ruby_do_block or @ruby_lambda or @ruby_block; + + class Range extends Scope::Range, TypeRange { + Parameter::Range getParameter(int i) { + result = this.(Ruby::Method).getParameters().getChild(i) or + result = this.(Ruby::SingletonMethod).getParameters().getChild(i) or + result = this.(Ruby::DoBlock).getParameters().getChild(i) or + result = this.(Ruby::Lambda).getParameters().getChild(i) or + result = this.(Ruby::Block).getParameters().getChild(i) + } + } +} + +module ModuleBase { + class TypeRange = @ruby_program or @ruby_module or @ruby_class or @ruby_singleton_class; + + class Range extends Scope::Range, TypeRange { } +} + +pragma[noinline] +private predicate rankHeredocBody(File f, Ruby::HeredocBody b, int i) { + b = + rank[i](Ruby::HeredocBody b0 | + f = b0.getLocation().getFile() + | + b0 order by b0.getLocation().getStartLine(), b0.getLocation().getStartColumn() + ) +} + +Ruby::HeredocBody getHereDocBody(Ruby::HeredocBeginning g) { + exists(int i, File f | + g = + rank[i](Ruby::HeredocBeginning b | + f = b.getLocation().getFile() + | + b order by b.getLocation().getStartLine(), b.getLocation().getStartColumn() + ) and + rankHeredocBody(f, result, i) + ) +} + +private Ruby::AstNode parentOf(Ruby::AstNode n) { + n = getHereDocBody(result) + or + exists(Ruby::AstNode parent | parent = n.getParent() | + if + n = + [ + parent.(Ruby::Module).getName(), parent.(Ruby::Class).getName(), + parent.(Ruby::Class).getSuperclass(), parent.(Ruby::SingletonClass).getValue(), + parent.(Ruby::Method).getName(), parent.(Ruby::SingletonMethod).getName(), + parent.(Ruby::SingletonMethod).getObject() + ] + then result = parent.getParent() + else result = parent + ) +} + +/** Gets the enclosing scope of a node */ +cached +Scope::Range scopeOf(Ruby::AstNode n) { + exists(Ruby::AstNode p | p = parentOf(n) | + p = result + or + not p instanceof Scope::Range and result = scopeOf(p) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll new file mode 100644 index 00000000000..a8673050148 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll @@ -0,0 +1,797 @@ +/** Provides predicates for synthesizing AST nodes. */ + +private import AST +private import TreeSitter +private import codeql.ruby.ast.internal.Call +private import codeql.ruby.ast.internal.Variable +private import codeql.ruby.ast.internal.Pattern +private import codeql.ruby.AST + +/** A synthesized AST node kind. */ +newtype SynthKind = + AddExprKind() or + AssignExprKind() or + BitwiseAndExprKind() or + BitwiseOrExprKind() or + BitwiseXorExprKind() or + ClassVariableAccessKind(ClassVariable v) or + DivExprKind() or + ExponentExprKind() or + GlobalVariableAccessKind(GlobalVariable v) or + InstanceVariableAccessKind(InstanceVariable v) or + IntegerLiteralKind(int i) { i in [-1000 .. 1000] } or + LShiftExprKind() or + LocalVariableAccessRealKind(LocalVariableReal v) or + LocalVariableAccessSynthKind(TLocalVariableSynth v) or + LogicalAndExprKind() or + LogicalOrExprKind() or + MethodCallKind(string name, boolean setter, int arity) { + any(Synthesis s).methodCall(name, setter, arity) + } or + ModuloExprKind() or + MulExprKind() or + RangeLiteralKind(boolean inclusive) { inclusive in [false, true] } or + RShiftExprKind() or + SplatExprKind() or + StmtSequenceKind() or + SelfKind() or + SubExprKind() or + ConstantReadAccessKind(string value) { any(Synthesis s).constantReadAccess(value) } + +/** + * An AST child. + * + * Either a new synthesized node or a reference to an existing node. + */ +newtype Child = + SynthChild(SynthKind k) or + RealChild(AstNode n) + +private newtype TSynthesis = MkSynthesis() + +/** A class used for synthesizing AST nodes. */ +class Synthesis extends TSynthesis { + /** + * Holds if a node should be synthesized as the `i`th child of `parent`, or if + * a non-synthesized node should be the `i`th child of synthesized node `parent`. + * + * `i = -1` is used to represent that the synthesized node is a desugared version + * of its parent. + */ + predicate child(AstNode parent, int i, Child child) { none() } + + /** + * Holds if synthesized node `n` should have location `l`. Synthesized nodes for + * which this predicate does not hold, inherit their location (recursively) from + * their parent node. + */ + predicate location(AstNode n, Location l) { none() } + + /** + * Holds if a local variable, identified by `i`, should be synthesized for AST + * node `n`. + */ + predicate localVariable(AstNode n, int i) { none() } + + /** + * Holds if a method call to `name` with arity `arity` is needed. + */ + predicate methodCall(string name, boolean setter, int arity) { none() } + + /** + * Holds if a constant read access of `name` is needed. + */ + predicate constantReadAccess(string name) { none() } + + /** + * Holds if `n` should be excluded from `ControlFlowTree` in the CFG construction. + */ + predicate excludeFromControlFlowTree(AstNode n) { none() } + + final string toString() { none() } +} + +private class Desugared extends AstNode { + Desugared() { this = any(AstNode sugar).getDesugared() } + + AstNode getADescendant() { result = this.getAChild*() } +} + +/** + * Gets the desugaring level of `n`. That is, the number of desugaring + * transformations required before the context in which `n` occurs is + * fully desugared. + */ +int desugarLevel(AstNode n) { result = count(Desugared desugared | n = desugared.getADescendant()) } + +/** + * Use this predicate in `Synthesis::child` to generate an assignment of `value` to + * synthesized variable `v`, where the assignment is a child of `assignParent` at + * index `assignIndex`. + */ +bindingset[v, assignParent, assignIndex, value] +private predicate assign( + AstNode parent, int i, Child child, TLocalVariableSynth v, AstNode assignParent, int assignIndex, + AstNode value +) { + parent = assignParent and + i = assignIndex and + child = SynthChild(AssignExprKind()) + or + parent = TAssignExprSynth(assignParent, assignIndex) and + ( + i = 0 and + child = SynthChild(LocalVariableAccessSynthKind(v)) + or + i = 1 and + child = RealChild(value) + ) +} + +/** Holds if synthesized node `n` should have location `l`. */ +predicate synthLocation(AstNode n, Location l) { + n.isSynthesized() and any(Synthesis s).location(n, l) +} + +private predicate hasLocation(AstNode n, Location l) { + l = toGenerated(n).getLocation() + or + synthLocation(n, l) +} + +private module ImplicitSelfSynthesis { + pragma[nomagic] + private predicate identifierMethodCallSelfSynthesis(AstNode mc, int i, Child child) { + child = SynthChild(SelfKind()) and + mc = TIdentifierMethodCall(_) and + i = 0 + } + + private class IdentifierMethodCallSelfSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + identifierMethodCallSelfSynthesis(parent, i, child) + } + } + + pragma[nomagic] + private predicate regularMethodCallSelfSynthesis(TRegularMethodCall mc, int i, Child child) { + exists(Ruby::AstNode g | + mc = TRegularMethodCall(g) and + // If there's no explicit receiver (or scope resolution that acts like a + // receiver), then the receiver is implicitly `self`. N.B. `::Foo()` is + // not valid Ruby. + not exists(g.(Ruby::Call).getReceiver()) and + not exists(g.(Ruby::Call).getMethod().(Ruby::ScopeResolution).getScope()) + ) and + child = SynthChild(SelfKind()) and + i = 0 + } + + private class RegularMethodCallSelfSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + regularMethodCallSelfSynthesis(parent, i, child) + } + } +} + +private module SetterDesugar { + /** An assignment where the left-hand side is a method call. */ + private class SetterAssignExpr extends AssignExpr { + private MethodCall mc; + + pragma[nomagic] + SetterAssignExpr() { mc = this.getLeftOperand() } + + MethodCall getMethodCall() { result = mc } + + pragma[nomagic] + MethodCallKind getCallKind(boolean setter, int arity) { + result = MethodCallKind(mc.getMethodName(), setter, arity) + } + + pragma[nomagic] + Expr getReceiver() { result = mc.getReceiver() } + + pragma[nomagic] + Expr getArgument(int i) { result = mc.getArgument(i) } + + pragma[nomagic] + int getNumberOfArguments() { result = mc.getNumberOfArguments() } + + pragma[nomagic] + Location getMethodCallLocation() { hasLocation(mc, result) } + } + + pragma[nomagic] + private predicate setterMethodCallSynthesis(AstNode parent, int i, Child child) { + exists(SetterAssignExpr sae | + parent = sae and + i = -1 and + child = SynthChild(StmtSequenceKind()) + or + exists(AstNode seq | seq = TStmtSequenceSynth(sae, -1) | + parent = seq and + i = 0 and + child = SynthChild(sae.getCallKind(true, sae.getNumberOfArguments() + 1)) + or + exists(AstNode call | call = TMethodCallSynth(seq, 0, _, _, _) | + parent = call and + i = 0 and + child = RealChild(sae.getReceiver()) + or + parent = call and + child = RealChild(sae.getArgument(i - 1)) + or + exists(int valueIndex | valueIndex = sae.getNumberOfArguments() + 1 | + parent = call and + i = valueIndex and + child = SynthChild(AssignExprKind()) + or + parent = TAssignExprSynth(call, valueIndex) and + ( + i = 0 and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0))) + or + i = 1 and + child = RealChild(sae.getRightOperand()) + ) + ) + ) + or + parent = seq and + i = 1 and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0))) + ) + ) + } + + /** + * ```rb + * x.foo = y + * ``` + * + * desugars to + * + * ```rb + * x.foo=(__synth_0 = y); + * __synth_0; + * ``` + */ + private class SetterMethodCallSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + setterMethodCallSynthesis(parent, i, child) + } + + final override predicate location(AstNode n, Location l) { + exists(SetterAssignExpr sae, StmtSequence seq | + seq = sae.getDesugared() and + l = sae.getMethodCallLocation() and + n = seq.getAStmt() + ) + } + + final override predicate excludeFromControlFlowTree(AstNode n) { + n = any(SetterAssignExpr sae).getMethodCall() + } + + final override predicate localVariable(AstNode n, int i) { + n instanceof SetterAssignExpr and + i = 0 + } + + final override predicate methodCall(string name, boolean setter, int arity) { + exists(SetterAssignExpr sae | + name = sae.getMethodCall().getMethodName() and + setter = true and + arity = sae.getNumberOfArguments() + 1 + ) + } + } +} + +private module AssignOperationDesugar { + /** + * Gets the operator kind to synthesize for operator assignment `ao`. + */ + private SynthKind getKind(AssignOperation ao) { + ao instanceof AssignAddExpr and result = AddExprKind() + or + ao instanceof AssignSubExpr and result = SubExprKind() + or + ao instanceof AssignMulExpr and result = MulExprKind() + or + ao instanceof AssignDivExpr and result = DivExprKind() + or + ao instanceof AssignModuloExpr and result = ModuloExprKind() + or + ao instanceof AssignExponentExpr and result = ExponentExprKind() + or + ao instanceof AssignLogicalAndExpr and result = LogicalAndExprKind() + or + ao instanceof AssignLogicalOrExpr and result = LogicalOrExprKind() + or + ao instanceof AssignLShiftExpr and result = LShiftExprKind() + or + ao instanceof AssignRShiftExpr and result = RShiftExprKind() + or + ao instanceof AssignBitwiseAndExpr and result = BitwiseAndExprKind() + or + ao instanceof AssignBitwiseOrExpr and result = BitwiseOrExprKind() + or + ao instanceof AssignBitwiseXorExpr and result = BitwiseXorExprKind() + } + + private Location getAssignOperationLocation(AssignOperation ao) { + exists(Ruby::OperatorAssignment g, Ruby::Token op | + g = toGenerated(ao) and + op.getParent() = g and + op.getParentIndex() = 1 and + result = op.getLocation() + ) + } + + /** An assignment operation where the left-hand side is a variable. */ + private class VariableAssignOperation extends AssignOperation { + private Variable v; + + pragma[nomagic] + VariableAssignOperation() { v = this.getLeftOperand().(VariableAccess).getVariable() } + + pragma[nomagic] + SynthKind getVariableAccessKind() { + result in [ + LocalVariableAccessRealKind(v).(SynthKind), InstanceVariableAccessKind(v), + ClassVariableAccessKind(v), GlobalVariableAccessKind(v) + ] + } + } + + pragma[nomagic] + private predicate variableAssignOperationSynthesis(AstNode parent, int i, Child child) { + exists(VariableAssignOperation vao | + parent = vao and + i = -1 and + child = SynthChild(AssignExprKind()) + or + exists(AstNode assign | assign = TAssignExprSynth(vao, -1) | + parent = assign and + i = 0 and + child = RealChild(vao.getLeftOperand()) + or + parent = assign and + i = 1 and + child = SynthChild(getKind(vao)) + or + parent = getSynthChild(assign, 1) and + ( + i = 0 and + child = SynthChild(vao.getVariableAccessKind()) + or + i = 1 and + child = RealChild(vao.getRightOperand()) + ) + ) + ) + } + + /** + * ```rb + * x += y + * ``` + * + * desugars to + * + * ```rb + * x = x + y + * ``` + * + * when `x` is a variable. + */ + private class VariableAssignOperationSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + variableAssignOperationSynthesis(parent, i, child) + } + + final override predicate location(AstNode n, Location l) { + exists(VariableAssignOperation vao, BinaryOperation bo | + bo = vao.getDesugared().(AssignExpr).getRightOperand() + | + n = bo and + l = getAssignOperationLocation(vao) + or + n = bo.getLeftOperand() and + hasLocation(vao.getLeftOperand(), l) + ) + } + } + + /** An assignment operation where the left-hand side is a method call. */ + private class SetterAssignOperation extends AssignOperation { + private MethodCall mc; + + pragma[nomagic] + SetterAssignOperation() { mc = this.getLeftOperand() } + + MethodCall getMethodCall() { result = mc } + + pragma[nomagic] + MethodCallKind getCallKind(boolean setter, int arity) { + result = MethodCallKind(mc.getMethodName(), setter, arity) + } + + pragma[nomagic] + Expr getReceiver() { result = mc.getReceiver() } + + pragma[nomagic] + Expr getArgument(int i) { result = mc.getArgument(i) } + + pragma[nomagic] + int getNumberOfArguments() { result = mc.getNumberOfArguments() } + + pragma[nomagic] + Location getMethodCallLocation() { hasLocation(mc, result) } + } + + pragma[nomagic] + private predicate methodCallAssignOperationSynthesis(AstNode parent, int i, Child child) { + exists(SetterAssignOperation sao | + parent = sao and + i = -1 and + child = SynthChild(StmtSequenceKind()) + or + exists(AstNode seq | seq = TStmtSequenceSynth(sao, -1) | + // `__synth__0 = foo` + assign(parent, i, child, TLocalVariableSynth(sao, 0), seq, 0, sao.getReceiver()) + or + // `__synth__1 = bar` + exists(Expr arg, int j | arg = sao.getArgument(j - 1) | + assign(parent, i, child, TLocalVariableSynth(sao, j), seq, j, arg) + ) + or + // `__synth__2 = __synth__0.[](__synth__1) + y` + exists(int opAssignIndex | opAssignIndex = sao.getNumberOfArguments() + 1 | + parent = seq and + i = opAssignIndex and + child = SynthChild(AssignExprKind()) + or + exists(AstNode assign | assign = TAssignExprSynth(seq, opAssignIndex) | + parent = assign and + i = 0 and + child = + SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex))) + or + parent = assign and + i = 1 and + child = SynthChild(getKind(sao)) + or + // `__synth__0.[](__synth__1) + y` + exists(AstNode op | op = getSynthChild(assign, 1) | + parent = op and + i = 0 and + child = SynthChild(sao.getCallKind(false, sao.getNumberOfArguments())) + or + parent = TMethodCallSynth(op, 0, _, _, _) and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and + i in [0 .. sao.getNumberOfArguments()] + or + parent = op and + i = 1 and + child = RealChild(sao.getRightOperand()) + ) + ) + or + // `__synth__0.[]=(__synth__1, __synth__2);` + parent = seq and + i = opAssignIndex + 1 and + child = SynthChild(sao.getCallKind(true, opAssignIndex)) + or + exists(AstNode setter | setter = TMethodCallSynth(seq, opAssignIndex + 1, _, _, _) | + parent = setter and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and + i in [0 .. sao.getNumberOfArguments()] + or + parent = setter and + i = opAssignIndex + 1 and + child = + SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex))) + ) + or + parent = seq and + i = opAssignIndex + 2 and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex))) + ) + ) + ) + } + + /** + * ```rb + * foo[bar] += y + * ``` + * + * desugars to + * + * ```rb + * __synth__0 = foo; + * __synth__1 = bar; + * __synth__2 = __synth__0.[](__synth__1) + y; + * __synth__0.[]=(__synth__1, __synth__2); + * __synth__2; + * ``` + */ + private class MethodCallAssignOperationSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + methodCallAssignOperationSynthesis(parent, i, child) + } + + final override predicate location(AstNode n, Location l) { + exists(SetterAssignOperation sao, StmtSequence seq | seq = sao.getDesugared() | + n = seq.getStmt(0) and + hasLocation(sao.getReceiver(), l) + or + exists(int i | + n = seq.getStmt(i + 1) and + hasLocation(sao.getArgument(i), l) + ) + or + exists(AssignExpr ae, int opAssignIndex | + opAssignIndex = sao.getNumberOfArguments() + 1 and + ae = seq.getStmt(opAssignIndex) + | + l = getAssignOperationLocation(sao) and + n = ae + or + exists(BinaryOperation bo | bo = ae.getRightOperand() | + n = bo.getLeftOperand() and + l = sao.getMethodCallLocation() + or + exists(MethodCall mc | mc = bo.getLeftOperand() | + n = mc.getReceiver() and + hasLocation(sao.getReceiver(), l) + or + exists(int i | + n = mc.getArgument(i) and + hasLocation(sao.getArgument(i), l) + ) + ) + ) + or + exists(MethodCall mc | mc = seq.getStmt(opAssignIndex + 1) | + n = mc and + l = sao.getMethodCallLocation() + or + n = mc.getReceiver() and + hasLocation(sao.getReceiver(), l) + or + exists(int i | n = mc.getArgument(i) | + hasLocation(sao.getArgument(i), l) + or + i = opAssignIndex and + l = getAssignOperationLocation(sao) + ) + ) + or + n = seq.getStmt(opAssignIndex + 2) and + l = getAssignOperationLocation(sao) + ) + ) + } + + final override predicate localVariable(AstNode n, int i) { + n = any(SetterAssignOperation sao | i in [0 .. sao.getNumberOfArguments() + 1]) + } + + final override predicate methodCall(string name, boolean setter, int arity) { + exists(SetterAssignOperation sao | name = sao.getMethodCall().getMethodName() | + setter = false and + arity = sao.getNumberOfArguments() + or + setter = true and + arity = sao.getNumberOfArguments() + 1 + ) + } + + final override predicate excludeFromControlFlowTree(AstNode n) { + n = any(SetterAssignOperation sao).getMethodCall() + } + } +} + +private module CompoundAssignDesugar { + /** An assignment where the left-hand side is a tuple pattern. */ + private class TupleAssignExpr extends AssignExpr { + private TuplePattern tp; + + pragma[nomagic] + TupleAssignExpr() { tp = this.getLeftOperand() } + + TuplePattern getTuplePattern() { result = tp } + + pragma[nomagic] + Pattern getElement(int i) { result = tp.getElement(i) } + + pragma[nomagic] + int getNumberOfElements() { + toGenerated(tp) = any(TuplePatternImpl impl | result = count(impl.getChildNode(_))) + } + + pragma[nomagic] + int getRestIndexOrNumberOfElements() { + result = tp.getRestIndex() + or + toGenerated(tp) = any(TuplePatternImpl impl | not exists(impl.getRestIndex())) and + result = this.getNumberOfElements() + } + } + + pragma[nomagic] + private predicate compoundAssignSynthesis(AstNode parent, int i, Child child) { + exists(TupleAssignExpr tae | + parent = tae and + i = -1 and + child = SynthChild(StmtSequenceKind()) + or + exists(AstNode seq | seq = TStmtSequenceSynth(tae, -1) | + parent = seq and + i = 0 and + child = SynthChild(AssignExprKind()) + or + exists(AstNode assign | assign = TAssignExprSynth(seq, 0) | + parent = assign and + i = 0 and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0))) + or + parent = assign and + i = 1 and + child = SynthChild(SplatExprKind()) + or + parent = TSplatExprSynth(assign, 1) and + i = 0 and + child = RealChild(tae.getRightOperand()) + ) + or + exists(Pattern p, int j, int restIndex | + p = tae.getElement(j) and + restIndex = tae.getRestIndexOrNumberOfElements() + | + parent = seq and + i = j + 1 and + child = SynthChild(AssignExprKind()) + or + exists(AstNode assign | assign = TAssignExprSynth(seq, j + 1) | + parent = assign and + i = 0 and + child = RealChild(p) + or + parent = assign and + i = 1 and + child = SynthChild(MethodCallKind("[]", false, 1)) + or + parent = TMethodCallSynth(assign, 1, _, _, _) and + i = 0 and + child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0))) + or + j < restIndex and + parent = TMethodCallSynth(assign, 1, _, _, _) and + i = 1 and + child = SynthChild(IntegerLiteralKind(j)) + or + j = restIndex and + ( + parent = TMethodCallSynth(assign, 1, _, _, _) and + i = 1 and + child = SynthChild(RangeLiteralKind(true)) + or + exists(AstNode call | + call = TMethodCallSynth(assign, 1, _, _, _) and + parent = TRangeLiteralSynth(call, 1, _) + | + i = 0 and + child = SynthChild(IntegerLiteralKind(j)) + or + i = 1 and + child = SynthChild(IntegerLiteralKind(restIndex - tae.getNumberOfElements())) + ) + ) + or + j > restIndex and + parent = TMethodCallSynth(assign, 1, _, _, _) and + i = 1 and + child = SynthChild(IntegerLiteralKind(j - tae.getNumberOfElements())) + ) + ) + ) + ) + } + + /** + * ```rb + * x, *y, z = w + * ``` + * desugars to + * + * ```rb + * __synth__0 = *w; + * x = __synth__0[0]; + * y = __synth__0[1..-2]; + * z = __synth__0[-1]; + * ``` + */ + private class CompoundAssignSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + compoundAssignSynthesis(parent, i, child) + } + + final override predicate location(AstNode n, Location l) { + exists(TupleAssignExpr tae, StmtSequence seq | seq = tae.getDesugared() | + n = seq.getStmt(0) and + hasLocation(tae.getRightOperand(), l) + or + exists(Pattern p, int j | + p = tae.getElement(j) and + n = seq.getStmt(j + 1) and + hasLocation(p, l) + ) + ) + } + + final override predicate localVariable(AstNode n, int i) { + n instanceof TupleAssignExpr and + i = 0 + } + + final override predicate methodCall(string name, boolean setter, int arity) { + name = "[]" and + setter = false and + arity = 1 + } + + final override predicate excludeFromControlFlowTree(AstNode n) { + n = any(TupleAssignExpr tae).getTuplePattern() + } + } +} + +private module ArrayLiteralDesugar { + pragma[nomagic] + private predicate arrayLiteralSynthesis(AstNode parent, int i, Child child) { + exists(ArrayLiteral al | + parent = al and + i = -1 and + child = SynthChild(MethodCallKind("[]", false, al.getNumberOfElements() + 1)) + or + exists(AstNode mc | mc = TMethodCallSynth(al, -1, _, _, _) | + parent = mc and + i = 0 and + child = SynthChild(ConstantReadAccessKind("::Array")) + or + parent = mc and + child = RealChild(al.getElement(i - 1)) + ) + ) + } + + /** + * ```rb + * [1, 2, 3] + * ``` + * desugars to + * + * ```rb + * ::Array.[](1, 2, 3) + * ``` + */ + private class CompoundAssignSynthesis extends Synthesis { + final override predicate child(AstNode parent, int i, Child child) { + arrayLiteralSynthesis(parent, i, child) + } + + final override predicate methodCall(string name, boolean setter, int arity) { + name = "[]" and + setter = false and + arity = any(ArrayLiteral al).getNumberOfElements() + 1 + } + + final override predicate constantReadAccess(string name) { name = "::Array" } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll new file mode 100644 index 00000000000..d054d15b675 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll @@ -0,0 +1,2000 @@ +/* + * CodeQL library for Ruby + * Automatically generated from the tree-sitter grammar; do not edit + */ + +private import codeql.files.FileSystem +private import codeql.Locations + +module Ruby { + /** The base class for all AST nodes */ + class AstNode extends @ruby_ast_node { + /** Gets a string representation of this element. */ + string toString() { result = this.getAPrimaryQlClass() } + + /** Gets the location of this element. */ + Location getLocation() { none() } + + /** Gets the parent of this element. */ + AstNode getParent() { ruby_ast_node_parent(this, result, _) } + + /** Gets the index of this node among the children of its parent. */ + int getParentIndex() { ruby_ast_node_parent(this, _, result) } + + /** Gets a field or child node of this node. */ + AstNode getAFieldOrChild() { none() } + + /** Gets the name of the primary QL class for this element. */ + string getAPrimaryQlClass() { result = "???" } + + /** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */ + string getPrimaryQlClasses() { result = concat(getAPrimaryQlClass(), ",") } + } + + /** A token. */ + class Token extends @ruby_token, AstNode { + /** Gets the value of this token. */ + string getValue() { ruby_tokeninfo(this, _, result, _) } + + /** Gets the location of this token. */ + override Location getLocation() { ruby_tokeninfo(this, _, _, result) } + + /** Gets a string representation of this element. */ + override string toString() { result = getValue() } + + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Token" } + } + + /** A reserved word. */ + class ReservedWord extends @ruby_reserved_word, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ReservedWord" } + } + + class UnderscoreArg extends @ruby_underscore_arg, AstNode { } + + class UnderscoreLhs extends @ruby_underscore_lhs, AstNode { } + + class UnderscoreMethodName extends @ruby_underscore_method_name, AstNode { } + + class UnderscorePrimary extends @ruby_underscore_primary, AstNode { } + + class UnderscoreStatement extends @ruby_underscore_statement, AstNode { } + + class UnderscoreVariable extends @ruby_underscore_variable, AstNode { } + + /** A class representing `alias` nodes. */ + class Alias extends @ruby_alias, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Alias" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_alias_def(this, _, _, result) } + + /** Gets the node corresponding to the field `alias`. */ + UnderscoreMethodName getAlias() { ruby_alias_def(this, result, _, _) } + + /** Gets the node corresponding to the field `name`. */ + UnderscoreMethodName getName() { ruby_alias_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_alias_def(this, result, _, _) or ruby_alias_def(this, _, result, _) + } + } + + /** A class representing `argument_list` nodes. */ + class ArgumentList extends @ruby_argument_list, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ArgumentList" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_argument_list_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_argument_list_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_argument_list_child(this, _, result) } + } + + /** A class representing `array` nodes. */ + class Array extends @ruby_array, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Array" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_array_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_array_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_array_child(this, _, result) } + } + + /** A class representing `assignment` nodes. */ + class Assignment extends @ruby_assignment, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Assignment" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_assignment_def(this, _, _, result) } + + /** Gets the node corresponding to the field `left`. */ + AstNode getLeft() { ruby_assignment_def(this, result, _, _) } + + /** Gets the node corresponding to the field `right`. */ + AstNode getRight() { ruby_assignment_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_assignment_def(this, result, _, _) or ruby_assignment_def(this, _, result, _) + } + } + + /** A class representing `bare_string` nodes. */ + class BareString extends @ruby_bare_string, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BareString" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_bare_string_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_bare_string_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_bare_string_child(this, _, result) } + } + + /** A class representing `bare_symbol` nodes. */ + class BareSymbol extends @ruby_bare_symbol, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BareSymbol" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_bare_symbol_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_bare_symbol_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_bare_symbol_child(this, _, result) } + } + + /** A class representing `begin` nodes. */ + class Begin extends @ruby_begin, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Begin" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_begin_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_begin_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_begin_child(this, _, result) } + } + + /** A class representing `begin_block` nodes. */ + class BeginBlock extends @ruby_begin_block, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BeginBlock" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_begin_block_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_begin_block_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_begin_block_child(this, _, result) } + } + + /** A class representing `binary` nodes. */ + class Binary extends @ruby_binary, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Binary" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_binary_def(this, _, _, _, result) } + + /** Gets the node corresponding to the field `left`. */ + AstNode getLeft() { ruby_binary_def(this, result, _, _, _) } + + /** Gets the node corresponding to the field `operator`. */ + string getOperator() { + exists(int value | ruby_binary_def(this, _, value, _, _) | + result = "!=" and value = 0 + or + result = "!~" and value = 1 + or + result = "%" and value = 2 + or + result = "&" and value = 3 + or + result = "&&" and value = 4 + or + result = "*" and value = 5 + or + result = "**" and value = 6 + or + result = "+" and value = 7 + or + result = "-" and value = 8 + or + result = "/" and value = 9 + or + result = "<" and value = 10 + or + result = "<<" and value = 11 + or + result = "<=" and value = 12 + or + result = "<=>" and value = 13 + or + result = "==" and value = 14 + or + result = "===" and value = 15 + or + result = "=~" and value = 16 + or + result = ">" and value = 17 + or + result = ">=" and value = 18 + or + result = ">>" and value = 19 + or + result = "^" and value = 20 + or + result = "and" and value = 21 + or + result = "or" and value = 22 + or + result = "|" and value = 23 + or + result = "||" and value = 24 + ) + } + + /** Gets the node corresponding to the field `right`. */ + AstNode getRight() { ruby_binary_def(this, _, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_binary_def(this, result, _, _, _) or ruby_binary_def(this, _, _, result, _) + } + } + + /** A class representing `block` nodes. */ + class Block extends @ruby_block, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Block" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_block_def(this, result) } + + /** Gets the node corresponding to the field `parameters`. */ + BlockParameters getParameters() { ruby_block_parameters(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_block_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_block_parameters(this, result) or ruby_block_child(this, _, result) + } + } + + /** A class representing `block_argument` nodes. */ + class BlockArgument extends @ruby_block_argument, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BlockArgument" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_block_argument_def(this, _, result) } + + /** Gets the child of this node. */ + UnderscoreArg getChild() { ruby_block_argument_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_block_argument_def(this, result, _) } + } + + /** A class representing `block_parameter` nodes. */ + class BlockParameter extends @ruby_block_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BlockParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_block_parameter_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_block_parameter_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_block_parameter_def(this, result, _) } + } + + /** A class representing `block_parameters` nodes. */ + class BlockParameters extends @ruby_block_parameters, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "BlockParameters" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_block_parameters_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_block_parameters_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_block_parameters_child(this, _, result) } + } + + /** A class representing `break` nodes. */ + class Break extends @ruby_break, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Break" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_break_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_break_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_break_child(this, result) } + } + + /** A class representing `call` nodes. */ + class Call extends @ruby_call, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Call" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_call_def(this, _, result) } + + /** Gets the node corresponding to the field `arguments`. */ + ArgumentList getArguments() { ruby_call_arguments(this, result) } + + /** Gets the node corresponding to the field `block`. */ + AstNode getBlock() { ruby_call_block(this, result) } + + /** Gets the node corresponding to the field `method`. */ + AstNode getMethod() { ruby_call_def(this, result, _) } + + /** Gets the node corresponding to the field `receiver`. */ + AstNode getReceiver() { ruby_call_receiver(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_call_arguments(this, result) or + ruby_call_block(this, result) or + ruby_call_def(this, result, _) or + ruby_call_receiver(this, result) + } + } + + /** A class representing `case` nodes. */ + class Case extends @ruby_case__, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Case" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_case_def(this, result) } + + /** Gets the node corresponding to the field `value`. */ + UnderscoreStatement getValue() { ruby_case_value(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_case_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_case_value(this, result) or ruby_case_child(this, _, result) + } + } + + /** A class representing `chained_string` nodes. */ + class ChainedString extends @ruby_chained_string, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ChainedString" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_chained_string_def(this, result) } + + /** Gets the `i`th child of this node. */ + String getChild(int i) { ruby_chained_string_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_chained_string_child(this, _, result) } + } + + /** A class representing `character` tokens. */ + class Character extends @ruby_token_character, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Character" } + } + + /** A class representing `class` nodes. */ + class Class extends @ruby_class, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Class" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_class_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + AstNode getName() { ruby_class_def(this, result, _) } + + /** Gets the node corresponding to the field `superclass`. */ + Superclass getSuperclass() { ruby_class_superclass(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_class_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_class_def(this, result, _) or + ruby_class_superclass(this, result) or + ruby_class_child(this, _, result) + } + } + + /** A class representing `class_variable` tokens. */ + class ClassVariable extends @ruby_token_class_variable, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ClassVariable" } + } + + /** A class representing `comment` tokens. */ + class Comment extends @ruby_token_comment, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Comment" } + } + + /** A class representing `complex` tokens. */ + class Complex extends @ruby_token_complex, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Complex" } + } + + /** A class representing `conditional` nodes. */ + class Conditional extends @ruby_conditional, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Conditional" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_conditional_def(this, _, _, _, result) } + + /** Gets the node corresponding to the field `alternative`. */ + UnderscoreArg getAlternative() { ruby_conditional_def(this, result, _, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreArg getCondition() { ruby_conditional_def(this, _, result, _, _) } + + /** Gets the node corresponding to the field `consequence`. */ + UnderscoreArg getConsequence() { ruby_conditional_def(this, _, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_conditional_def(this, result, _, _, _) or + ruby_conditional_def(this, _, result, _, _) or + ruby_conditional_def(this, _, _, result, _) + } + } + + /** A class representing `constant` tokens. */ + class Constant extends @ruby_token_constant, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Constant" } + } + + /** A class representing `delimited_symbol` nodes. */ + class DelimitedSymbol extends @ruby_delimited_symbol, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "DelimitedSymbol" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_delimited_symbol_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_delimited_symbol_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_delimited_symbol_child(this, _, result) } + } + + /** A class representing `destructured_left_assignment` nodes. */ + class DestructuredLeftAssignment extends @ruby_destructured_left_assignment, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "DestructuredLeftAssignment" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_destructured_left_assignment_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_destructured_left_assignment_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_destructured_left_assignment_child(this, _, result) } + } + + /** A class representing `destructured_parameter` nodes. */ + class DestructuredParameter extends @ruby_destructured_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "DestructuredParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_destructured_parameter_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_destructured_parameter_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_destructured_parameter_child(this, _, result) } + } + + /** A class representing `do` nodes. */ + class Do extends @ruby_do, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Do" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_do_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_do_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_do_child(this, _, result) } + } + + /** A class representing `do_block` nodes. */ + class DoBlock extends @ruby_do_block, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "DoBlock" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_do_block_def(this, result) } + + /** Gets the node corresponding to the field `parameters`. */ + BlockParameters getParameters() { ruby_do_block_parameters(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_do_block_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_do_block_parameters(this, result) or ruby_do_block_child(this, _, result) + } + } + + /** A class representing `element_reference` nodes. */ + class ElementReference extends @ruby_element_reference, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ElementReference" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_element_reference_def(this, _, result) } + + /** Gets the node corresponding to the field `object`. */ + UnderscorePrimary getObject() { ruby_element_reference_def(this, result, _) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_element_reference_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_element_reference_def(this, result, _) or ruby_element_reference_child(this, _, result) + } + } + + /** A class representing `else` nodes. */ + class Else extends @ruby_else, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Else" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_else_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_else_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_else_child(this, _, result) } + } + + /** A class representing `elsif` nodes. */ + class Elsif extends @ruby_elsif, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Elsif" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_elsif_def(this, _, result) } + + /** Gets the node corresponding to the field `alternative`. */ + AstNode getAlternative() { ruby_elsif_alternative(this, result) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreStatement getCondition() { ruby_elsif_def(this, result, _) } + + /** Gets the node corresponding to the field `consequence`. */ + Then getConsequence() { ruby_elsif_consequence(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_elsif_alternative(this, result) or + ruby_elsif_def(this, result, _) or + ruby_elsif_consequence(this, result) + } + } + + /** A class representing `empty_statement` tokens. */ + class EmptyStatement extends @ruby_token_empty_statement, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "EmptyStatement" } + } + + /** A class representing `end_block` nodes. */ + class EndBlock extends @ruby_end_block, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "EndBlock" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_end_block_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_end_block_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_end_block_child(this, _, result) } + } + + /** A class representing `ensure` nodes. */ + class Ensure extends @ruby_ensure, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Ensure" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_ensure_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_ensure_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_ensure_child(this, _, result) } + } + + /** A class representing `escape_sequence` tokens. */ + class EscapeSequence extends @ruby_token_escape_sequence, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "EscapeSequence" } + } + + /** A class representing `exception_variable` nodes. */ + class ExceptionVariable extends @ruby_exception_variable, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ExceptionVariable" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_exception_variable_def(this, _, result) } + + /** Gets the child of this node. */ + UnderscoreLhs getChild() { ruby_exception_variable_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_exception_variable_def(this, result, _) } + } + + /** A class representing `exceptions` nodes. */ + class Exceptions extends @ruby_exceptions, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Exceptions" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_exceptions_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_exceptions_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_exceptions_child(this, _, result) } + } + + /** A class representing `false` tokens. */ + class False extends @ruby_token_false, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "False" } + } + + /** A class representing `float` tokens. */ + class Float extends @ruby_token_float, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Float" } + } + + /** A class representing `for` nodes. */ + class For extends @ruby_for, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "For" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_for_def(this, _, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + Do getBody() { ruby_for_def(this, result, _, _, _) } + + /** Gets the node corresponding to the field `pattern`. */ + AstNode getPattern() { ruby_for_def(this, _, result, _, _) } + + /** Gets the node corresponding to the field `value`. */ + In getValue() { ruby_for_def(this, _, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_for_def(this, result, _, _, _) or + ruby_for_def(this, _, result, _, _) or + ruby_for_def(this, _, _, result, _) + } + } + + /** A class representing `forward_argument` tokens. */ + class ForwardArgument extends @ruby_token_forward_argument, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ForwardArgument" } + } + + /** A class representing `forward_parameter` tokens. */ + class ForwardParameter extends @ruby_token_forward_parameter, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ForwardParameter" } + } + + /** A class representing `global_variable` tokens. */ + class GlobalVariable extends @ruby_token_global_variable, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "GlobalVariable" } + } + + /** A class representing `hash` nodes. */ + class Hash extends @ruby_hash, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Hash" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_hash_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_hash_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_hash_child(this, _, result) } + } + + /** A class representing `hash_key_symbol` tokens. */ + class HashKeySymbol extends @ruby_token_hash_key_symbol, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HashKeySymbol" } + } + + /** A class representing `hash_splat_argument` nodes. */ + class HashSplatArgument extends @ruby_hash_splat_argument, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HashSplatArgument" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_hash_splat_argument_def(this, _, result) } + + /** Gets the child of this node. */ + UnderscoreArg getChild() { ruby_hash_splat_argument_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_hash_splat_argument_def(this, result, _) } + } + + /** A class representing `hash_splat_parameter` nodes. */ + class HashSplatParameter extends @ruby_hash_splat_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HashSplatParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_hash_splat_parameter_def(this, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_hash_splat_parameter_name(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_hash_splat_parameter_name(this, result) } + } + + /** A class representing `heredoc_beginning` tokens. */ + class HeredocBeginning extends @ruby_token_heredoc_beginning, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HeredocBeginning" } + } + + /** A class representing `heredoc_body` nodes. */ + class HeredocBody extends @ruby_heredoc_body, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HeredocBody" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_heredoc_body_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_heredoc_body_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_heredoc_body_child(this, _, result) } + } + + /** A class representing `heredoc_content` tokens. */ + class HeredocContent extends @ruby_token_heredoc_content, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HeredocContent" } + } + + /** A class representing `heredoc_end` tokens. */ + class HeredocEnd extends @ruby_token_heredoc_end, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "HeredocEnd" } + } + + /** A class representing `identifier` tokens. */ + class Identifier extends @ruby_token_identifier, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Identifier" } + } + + /** A class representing `if` nodes. */ + class If extends @ruby_if, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "If" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_if_def(this, _, result) } + + /** Gets the node corresponding to the field `alternative`. */ + AstNode getAlternative() { ruby_if_alternative(this, result) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreStatement getCondition() { ruby_if_def(this, result, _) } + + /** Gets the node corresponding to the field `consequence`. */ + Then getConsequence() { ruby_if_consequence(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_if_alternative(this, result) or + ruby_if_def(this, result, _) or + ruby_if_consequence(this, result) + } + } + + /** A class representing `if_modifier` nodes. */ + class IfModifier extends @ruby_if_modifier, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "IfModifier" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_if_modifier_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + UnderscoreStatement getBody() { ruby_if_modifier_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + AstNode getCondition() { ruby_if_modifier_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_if_modifier_def(this, result, _, _) or ruby_if_modifier_def(this, _, result, _) + } + } + + /** A class representing `in` nodes. */ + class In extends @ruby_in, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "In" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_in_def(this, _, result) } + + /** Gets the child of this node. */ + UnderscoreArg getChild() { ruby_in_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_in_def(this, result, _) } + } + + /** A class representing `instance_variable` tokens. */ + class InstanceVariable extends @ruby_token_instance_variable, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "InstanceVariable" } + } + + /** A class representing `integer` tokens. */ + class Integer extends @ruby_token_integer, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Integer" } + } + + /** A class representing `interpolation` nodes. */ + class Interpolation extends @ruby_interpolation, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Interpolation" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_interpolation_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_interpolation_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_interpolation_child(this, _, result) } + } + + /** A class representing `keyword_parameter` nodes. */ + class KeywordParameter extends @ruby_keyword_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "KeywordParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_keyword_parameter_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_keyword_parameter_def(this, result, _) } + + /** Gets the node corresponding to the field `value`. */ + UnderscoreArg getValue() { ruby_keyword_parameter_value(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_keyword_parameter_def(this, result, _) or ruby_keyword_parameter_value(this, result) + } + } + + /** A class representing `lambda` nodes. */ + class Lambda extends @ruby_lambda, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Lambda" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_lambda_def(this, _, result) } + + /** Gets the node corresponding to the field `body`. */ + AstNode getBody() { ruby_lambda_def(this, result, _) } + + /** Gets the node corresponding to the field `parameters`. */ + LambdaParameters getParameters() { ruby_lambda_parameters(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_lambda_def(this, result, _) or ruby_lambda_parameters(this, result) + } + } + + /** A class representing `lambda_parameters` nodes. */ + class LambdaParameters extends @ruby_lambda_parameters, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "LambdaParameters" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_lambda_parameters_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_lambda_parameters_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_lambda_parameters_child(this, _, result) } + } + + /** A class representing `left_assignment_list` nodes. */ + class LeftAssignmentList extends @ruby_left_assignment_list, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "LeftAssignmentList" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_left_assignment_list_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_left_assignment_list_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_left_assignment_list_child(this, _, result) } + } + + /** A class representing `method` nodes. */ + class Method extends @ruby_method, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Method" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_method_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + UnderscoreMethodName getName() { ruby_method_def(this, result, _) } + + /** Gets the node corresponding to the field `parameters`. */ + MethodParameters getParameters() { ruby_method_parameters(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_method_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_method_def(this, result, _) or + ruby_method_parameters(this, result) or + ruby_method_child(this, _, result) + } + } + + /** A class representing `method_parameters` nodes. */ + class MethodParameters extends @ruby_method_parameters, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "MethodParameters" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_method_parameters_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_method_parameters_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_method_parameters_child(this, _, result) } + } + + /** A class representing `module` nodes. */ + class Module extends @ruby_module, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Module" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_module_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + AstNode getName() { ruby_module_def(this, result, _) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_module_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_module_def(this, result, _) or ruby_module_child(this, _, result) + } + } + + /** A class representing `next` nodes. */ + class Next extends @ruby_next, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Next" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_next_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_next_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_next_child(this, result) } + } + + /** A class representing `nil` tokens. */ + class Nil extends @ruby_token_nil, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Nil" } + } + + /** A class representing `operator` tokens. */ + class Operator extends @ruby_token_operator, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Operator" } + } + + /** A class representing `operator_assignment` nodes. */ + class OperatorAssignment extends @ruby_operator_assignment, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "OperatorAssignment" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_operator_assignment_def(this, _, _, _, result) } + + /** Gets the node corresponding to the field `left`. */ + UnderscoreLhs getLeft() { ruby_operator_assignment_def(this, result, _, _, _) } + + /** Gets the node corresponding to the field `operator`. */ + string getOperator() { + exists(int value | ruby_operator_assignment_def(this, _, value, _, _) | + result = "%=" and value = 0 + or + result = "&&=" and value = 1 + or + result = "&=" and value = 2 + or + result = "**=" and value = 3 + or + result = "*=" and value = 4 + or + result = "+=" and value = 5 + or + result = "-=" and value = 6 + or + result = "/=" and value = 7 + or + result = "<<=" and value = 8 + or + result = ">>=" and value = 9 + or + result = "^=" and value = 10 + or + result = "|=" and value = 11 + or + result = "||=" and value = 12 + ) + } + + /** Gets the node corresponding to the field `right`. */ + AstNode getRight() { ruby_operator_assignment_def(this, _, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_operator_assignment_def(this, result, _, _, _) or + ruby_operator_assignment_def(this, _, _, result, _) + } + } + + /** A class representing `optional_parameter` nodes. */ + class OptionalParameter extends @ruby_optional_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "OptionalParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_optional_parameter_def(this, _, _, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_optional_parameter_def(this, result, _, _) } + + /** Gets the node corresponding to the field `value`. */ + UnderscoreArg getValue() { ruby_optional_parameter_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_optional_parameter_def(this, result, _, _) or + ruby_optional_parameter_def(this, _, result, _) + } + } + + /** A class representing `pair` nodes. */ + class Pair extends @ruby_pair, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Pair" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_pair_def(this, _, _, result) } + + /** Gets the node corresponding to the field `key`. */ + AstNode getKey() { ruby_pair_def(this, result, _, _) } + + /** Gets the node corresponding to the field `value`. */ + UnderscoreArg getValue() { ruby_pair_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_pair_def(this, result, _, _) or ruby_pair_def(this, _, result, _) + } + } + + /** A class representing `parenthesized_statements` nodes. */ + class ParenthesizedStatements extends @ruby_parenthesized_statements, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ParenthesizedStatements" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_parenthesized_statements_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_parenthesized_statements_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_parenthesized_statements_child(this, _, result) } + } + + /** A class representing `pattern` nodes. */ + class Pattern extends @ruby_pattern, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Pattern" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_pattern_def(this, _, result) } + + /** Gets the child of this node. */ + AstNode getChild() { ruby_pattern_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_pattern_def(this, result, _) } + } + + /** A class representing `program` nodes. */ + class Program extends @ruby_program, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Program" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_program_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_program_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_program_child(this, _, result) } + } + + /** A class representing `range` nodes. */ + class Range extends @ruby_range, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Range" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_range_def(this, _, result) } + + /** Gets the node corresponding to the field `begin`. */ + UnderscoreArg getBegin() { ruby_range_begin(this, result) } + + /** Gets the node corresponding to the field `end`. */ + UnderscoreArg getEnd() { ruby_range_end(this, result) } + + /** Gets the node corresponding to the field `operator`. */ + string getOperator() { + exists(int value | ruby_range_def(this, value, _) | + result = ".." and value = 0 + or + result = "..." and value = 1 + ) + } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_range_begin(this, result) or ruby_range_end(this, result) + } + } + + /** A class representing `rational` nodes. */ + class Rational extends @ruby_rational, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Rational" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_rational_def(this, _, result) } + + /** Gets the child of this node. */ + AstNode getChild() { ruby_rational_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_rational_def(this, result, _) } + } + + /** A class representing `redo` nodes. */ + class Redo extends @ruby_redo, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Redo" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_redo_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_redo_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_redo_child(this, result) } + } + + /** A class representing `regex` nodes. */ + class Regex extends @ruby_regex, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Regex" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_regex_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_regex_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_regex_child(this, _, result) } + } + + /** A class representing `rescue` nodes. */ + class Rescue extends @ruby_rescue, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Rescue" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_rescue_def(this, result) } + + /** Gets the node corresponding to the field `body`. */ + Then getBody() { ruby_rescue_body(this, result) } + + /** Gets the node corresponding to the field `exceptions`. */ + Exceptions getExceptions() { ruby_rescue_exceptions(this, result) } + + /** Gets the node corresponding to the field `variable`. */ + ExceptionVariable getVariable() { ruby_rescue_variable(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_rescue_body(this, result) or + ruby_rescue_exceptions(this, result) or + ruby_rescue_variable(this, result) + } + } + + /** A class representing `rescue_modifier` nodes. */ + class RescueModifier extends @ruby_rescue_modifier, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "RescueModifier" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_rescue_modifier_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + UnderscoreStatement getBody() { ruby_rescue_modifier_def(this, result, _, _) } + + /** Gets the node corresponding to the field `handler`. */ + AstNode getHandler() { ruby_rescue_modifier_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_rescue_modifier_def(this, result, _, _) or ruby_rescue_modifier_def(this, _, result, _) + } + } + + /** A class representing `rest_assignment` nodes. */ + class RestAssignment extends @ruby_rest_assignment, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "RestAssignment" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_rest_assignment_def(this, result) } + + /** Gets the child of this node. */ + UnderscoreLhs getChild() { ruby_rest_assignment_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_rest_assignment_child(this, result) } + } + + /** A class representing `retry` nodes. */ + class Retry extends @ruby_retry, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Retry" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_retry_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_retry_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_retry_child(this, result) } + } + + /** A class representing `return` nodes. */ + class Return extends @ruby_return, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Return" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_return_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_return_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_return_child(this, result) } + } + + /** A class representing `right_assignment_list` nodes. */ + class RightAssignmentList extends @ruby_right_assignment_list, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "RightAssignmentList" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_right_assignment_list_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_right_assignment_list_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_right_assignment_list_child(this, _, result) } + } + + /** A class representing `scope_resolution` nodes. */ + class ScopeResolution extends @ruby_scope_resolution, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ScopeResolution" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_scope_resolution_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + AstNode getName() { ruby_scope_resolution_def(this, result, _) } + + /** Gets the node corresponding to the field `scope`. */ + UnderscorePrimary getScope() { ruby_scope_resolution_scope(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_scope_resolution_def(this, result, _) or ruby_scope_resolution_scope(this, result) + } + } + + /** A class representing `self` tokens. */ + class Self extends @ruby_token_self, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Self" } + } + + /** A class representing `setter` nodes. */ + class Setter extends @ruby_setter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Setter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_setter_def(this, _, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_setter_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_setter_def(this, result, _) } + } + + /** A class representing `simple_symbol` tokens. */ + class SimpleSymbol extends @ruby_token_simple_symbol, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SimpleSymbol" } + } + + /** A class representing `singleton_class` nodes. */ + class SingletonClass extends @ruby_singleton_class, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SingletonClass" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_singleton_class_def(this, _, result) } + + /** Gets the node corresponding to the field `value`. */ + UnderscoreArg getValue() { ruby_singleton_class_def(this, result, _) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_singleton_class_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_singleton_class_def(this, result, _) or ruby_singleton_class_child(this, _, result) + } + } + + /** A class representing `singleton_method` nodes. */ + class SingletonMethod extends @ruby_singleton_method, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SingletonMethod" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_singleton_method_def(this, _, _, result) } + + /** Gets the node corresponding to the field `name`. */ + UnderscoreMethodName getName() { ruby_singleton_method_def(this, result, _, _) } + + /** Gets the node corresponding to the field `object`. */ + AstNode getObject() { ruby_singleton_method_def(this, _, result, _) } + + /** Gets the node corresponding to the field `parameters`. */ + MethodParameters getParameters() { ruby_singleton_method_parameters(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_singleton_method_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_singleton_method_def(this, result, _, _) or + ruby_singleton_method_def(this, _, result, _) or + ruby_singleton_method_parameters(this, result) or + ruby_singleton_method_child(this, _, result) + } + } + + /** A class representing `splat_argument` nodes. */ + class SplatArgument extends @ruby_splat_argument, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SplatArgument" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_splat_argument_def(this, _, result) } + + /** Gets the child of this node. */ + UnderscoreArg getChild() { ruby_splat_argument_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_splat_argument_def(this, result, _) } + } + + /** A class representing `splat_parameter` nodes. */ + class SplatParameter extends @ruby_splat_parameter, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SplatParameter" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_splat_parameter_def(this, result) } + + /** Gets the node corresponding to the field `name`. */ + Identifier getName() { ruby_splat_parameter_name(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_splat_parameter_name(this, result) } + } + + /** A class representing `string` nodes. */ + class String extends @ruby_string__, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "String" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_string_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_string_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_string_child(this, _, result) } + } + + /** A class representing `string_array` nodes. */ + class StringArray extends @ruby_string_array, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "StringArray" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_string_array_def(this, result) } + + /** Gets the `i`th child of this node. */ + BareString getChild(int i) { ruby_string_array_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_string_array_child(this, _, result) } + } + + /** A class representing `string_content` tokens. */ + class StringContent extends @ruby_token_string_content, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "StringContent" } + } + + /** A class representing `subshell` nodes. */ + class Subshell extends @ruby_subshell, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Subshell" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_subshell_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_subshell_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_subshell_child(this, _, result) } + } + + /** A class representing `super` tokens. */ + class Super extends @ruby_token_super, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Super" } + } + + /** A class representing `superclass` nodes. */ + class Superclass extends @ruby_superclass, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Superclass" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_superclass_def(this, _, result) } + + /** Gets the child of this node. */ + AstNode getChild() { ruby_superclass_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_superclass_def(this, result, _) } + } + + /** A class representing `symbol_array` nodes. */ + class SymbolArray extends @ruby_symbol_array, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "SymbolArray" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_symbol_array_def(this, result) } + + /** Gets the `i`th child of this node. */ + BareSymbol getChild(int i) { ruby_symbol_array_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_symbol_array_child(this, _, result) } + } + + /** A class representing `then` nodes. */ + class Then extends @ruby_then, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Then" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_then_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { ruby_then_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_then_child(this, _, result) } + } + + /** A class representing `true` tokens. */ + class True extends @ruby_token_true, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "True" } + } + + /** A class representing `unary` nodes. */ + class Unary extends @ruby_unary, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Unary" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_unary_def(this, _, _, result) } + + /** Gets the node corresponding to the field `operand`. */ + AstNode getOperand() { ruby_unary_def(this, result, _, _) } + + /** Gets the node corresponding to the field `operator`. */ + string getOperator() { + exists(int value | ruby_unary_def(this, _, value, _) | + result = "!" and value = 0 + or + result = "+" and value = 1 + or + result = "-" and value = 2 + or + result = "defined?" and value = 3 + or + result = "not" and value = 4 + or + result = "~" and value = 5 + ) + } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_unary_def(this, result, _, _) } + } + + /** A class representing `undef` nodes. */ + class Undef extends @ruby_undef, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Undef" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_undef_def(this, result) } + + /** Gets the `i`th child of this node. */ + UnderscoreMethodName getChild(int i) { ruby_undef_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_undef_child(this, _, result) } + } + + /** A class representing `uninterpreted` tokens. */ + class Uninterpreted extends @ruby_token_uninterpreted, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Uninterpreted" } + } + + /** A class representing `unless` nodes. */ + class Unless extends @ruby_unless, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Unless" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_unless_def(this, _, result) } + + /** Gets the node corresponding to the field `alternative`. */ + AstNode getAlternative() { ruby_unless_alternative(this, result) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreStatement getCondition() { ruby_unless_def(this, result, _) } + + /** Gets the node corresponding to the field `consequence`. */ + Then getConsequence() { ruby_unless_consequence(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_unless_alternative(this, result) or + ruby_unless_def(this, result, _) or + ruby_unless_consequence(this, result) + } + } + + /** A class representing `unless_modifier` nodes. */ + class UnlessModifier extends @ruby_unless_modifier, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "UnlessModifier" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_unless_modifier_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + UnderscoreStatement getBody() { ruby_unless_modifier_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + AstNode getCondition() { ruby_unless_modifier_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_unless_modifier_def(this, result, _, _) or ruby_unless_modifier_def(this, _, result, _) + } + } + + /** A class representing `until` nodes. */ + class Until extends @ruby_until, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Until" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_until_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + Do getBody() { ruby_until_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreStatement getCondition() { ruby_until_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_until_def(this, result, _, _) or ruby_until_def(this, _, result, _) + } + } + + /** A class representing `until_modifier` nodes. */ + class UntilModifier extends @ruby_until_modifier, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "UntilModifier" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_until_modifier_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + UnderscoreStatement getBody() { ruby_until_modifier_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + AstNode getCondition() { ruby_until_modifier_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_until_modifier_def(this, result, _, _) or ruby_until_modifier_def(this, _, result, _) + } + } + + /** A class representing `when` nodes. */ + class When extends @ruby_when, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "When" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_when_def(this, result) } + + /** Gets the node corresponding to the field `body`. */ + Then getBody() { ruby_when_body(this, result) } + + /** Gets the node corresponding to the field `pattern`. */ + Pattern getPattern(int i) { ruby_when_pattern(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_when_body(this, result) or ruby_when_pattern(this, _, result) + } + } + + /** A class representing `while` nodes. */ + class While extends @ruby_while, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "While" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_while_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + Do getBody() { ruby_while_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + UnderscoreStatement getCondition() { ruby_while_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_while_def(this, result, _, _) or ruby_while_def(this, _, result, _) + } + } + + /** A class representing `while_modifier` nodes. */ + class WhileModifier extends @ruby_while_modifier, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "WhileModifier" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_while_modifier_def(this, _, _, result) } + + /** Gets the node corresponding to the field `body`. */ + UnderscoreStatement getBody() { ruby_while_modifier_def(this, result, _, _) } + + /** Gets the node corresponding to the field `condition`. */ + AstNode getCondition() { ruby_while_modifier_def(this, _, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { + ruby_while_modifier_def(this, result, _, _) or ruby_while_modifier_def(this, _, result, _) + } + } + + /** A class representing `yield` nodes. */ + class Yield extends @ruby_yield, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Yield" } + + /** Gets the location of this element. */ + override Location getLocation() { ruby_yield_def(this, result) } + + /** Gets the child of this node. */ + ArgumentList getChild() { ruby_yield_child(this, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { ruby_yield_child(this, result) } + } +} + +module Erb { + /** The base class for all AST nodes */ + class AstNode extends @erb_ast_node { + /** Gets a string representation of this element. */ + string toString() { result = this.getAPrimaryQlClass() } + + /** Gets the location of this element. */ + Location getLocation() { none() } + + /** Gets the parent of this element. */ + AstNode getParent() { erb_ast_node_parent(this, result, _) } + + /** Gets the index of this node among the children of its parent. */ + int getParentIndex() { erb_ast_node_parent(this, _, result) } + + /** Gets a field or child node of this node. */ + AstNode getAFieldOrChild() { none() } + + /** Gets the name of the primary QL class for this element. */ + string getAPrimaryQlClass() { result = "???" } + + /** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */ + string getPrimaryQlClasses() { result = concat(getAPrimaryQlClass(), ",") } + } + + /** A token. */ + class Token extends @erb_token, AstNode { + /** Gets the value of this token. */ + string getValue() { erb_tokeninfo(this, _, result, _) } + + /** Gets the location of this token. */ + override Location getLocation() { erb_tokeninfo(this, _, _, result) } + + /** Gets a string representation of this element. */ + override string toString() { result = getValue() } + + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Token" } + } + + /** A reserved word. */ + class ReservedWord extends @erb_reserved_word, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "ReservedWord" } + } + + /** A class representing `code` tokens. */ + class Code extends @erb_token_code, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Code" } + } + + /** A class representing `comment` tokens. */ + class Comment extends @erb_token_comment, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Comment" } + } + + /** A class representing `comment_directive` nodes. */ + class CommentDirective extends @erb_comment_directive, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "CommentDirective" } + + /** Gets the location of this element. */ + override Location getLocation() { erb_comment_directive_def(this, _, result) } + + /** Gets the child of this node. */ + Comment getChild() { erb_comment_directive_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { erb_comment_directive_def(this, result, _) } + } + + /** A class representing `content` tokens. */ + class Content extends @erb_token_content, Token { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Content" } + } + + /** A class representing `directive` nodes. */ + class Directive extends @erb_directive, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Directive" } + + /** Gets the location of this element. */ + override Location getLocation() { erb_directive_def(this, _, result) } + + /** Gets the child of this node. */ + Code getChild() { erb_directive_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { erb_directive_def(this, result, _) } + } + + /** A class representing `graphql_directive` nodes. */ + class GraphqlDirective extends @erb_graphql_directive, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "GraphqlDirective" } + + /** Gets the location of this element. */ + override Location getLocation() { erb_graphql_directive_def(this, _, result) } + + /** Gets the child of this node. */ + Code getChild() { erb_graphql_directive_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { erb_graphql_directive_def(this, result, _) } + } + + /** A class representing `output_directive` nodes. */ + class OutputDirective extends @erb_output_directive, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "OutputDirective" } + + /** Gets the location of this element. */ + override Location getLocation() { erb_output_directive_def(this, _, result) } + + /** Gets the child of this node. */ + Code getChild() { erb_output_directive_def(this, result, _) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { erb_output_directive_def(this, result, _) } + } + + /** A class representing `template` nodes. */ + class Template extends @erb_template, AstNode { + /** Gets the name of the primary QL class for this element. */ + override string getAPrimaryQlClass() { result = "Template" } + + /** Gets the location of this element. */ + override Location getLocation() { erb_template_def(this, result) } + + /** Gets the `i`th child of this node. */ + AstNode getChild(int i) { erb_template_child(this, i, result) } + + /** Gets a field or child node of this node. */ + override AstNode getAFieldOrChild() { erb_template_child(this, _, result) } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll new file mode 100644 index 00000000000..3394ef0665a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll @@ -0,0 +1,604 @@ +private import TreeSitter +private import codeql.Locations +private import codeql.ruby.AST +private import codeql.ruby.ast.internal.AST +private import codeql.ruby.ast.internal.Parameter +private import codeql.ruby.ast.internal.Scope +private import codeql.ruby.ast.internal.Synthesis + +/** + * Holds if `n` is in the left-hand-side of an explicit assignment `assignment`. + */ +predicate explicitAssignmentNode(Ruby::AstNode n, Ruby::AstNode assignment) { + n = assignment.(Ruby::Assignment).getLeft() + or + n = assignment.(Ruby::OperatorAssignment).getLeft() + or + exists(Ruby::AstNode parent | + parent = n.getParent() and + explicitAssignmentNode(parent, assignment) + | + parent instanceof Ruby::DestructuredLeftAssignment + or + parent instanceof Ruby::LeftAssignmentList + or + parent instanceof Ruby::RestAssignment + ) +} + +/** Holds if `n` is inside an implicit assignment. */ +predicate implicitAssignmentNode(Ruby::AstNode n) { + n = any(Ruby::ExceptionVariable ev).getChild() + or + n = any(Ruby::For for).getPattern() + or + implicitAssignmentNode(n.getParent()) +} + +/** Holds if `n` is inside a parameter. */ +predicate implicitParameterAssignmentNode(Ruby::AstNode n, Callable::Range c) { + n = c.getParameter(_) + or + implicitParameterAssignmentNode(n.getParent().(Ruby::DestructuredParameter), c) +} + +private predicate instanceVariableAccess( + Ruby::InstanceVariable var, string name, Scope::Range scope, boolean instance +) { + name = var.getValue() and + scope = enclosingModuleOrClass(var) and + if hasEnclosingMethod(var) then instance = true else instance = false +} + +private predicate classVariableAccess(Ruby::ClassVariable var, string name, Scope::Range scope) { + name = var.getValue() and + scope = enclosingModuleOrClass(var) +} + +private predicate hasEnclosingMethod(Ruby::AstNode node) { + exists(Scope::Range s | scopeOf(node) = s and exists(s.getEnclosingMethod())) +} + +private ModuleBase::Range enclosingModuleOrClass(Ruby::AstNode node) { + exists(Scope::Range s | scopeOf(node) = s and result = s.getEnclosingModule()) +} + +private predicate parameterAssignment(Callable::Range scope, string name, Ruby::Identifier i) { + implicitParameterAssignmentNode(i, scope) and + name = i.getValue() +} + +/** Holds if `scope` defines `name` in its parameter declaration at `i`. */ +private predicate scopeDefinesParameterVariable( + Callable::Range scope, string name, Ruby::Identifier i +) { + // In case of overlapping parameter names (e.g. `_`), only the first + // parameter will give rise to a variable + i = + min(Ruby::Identifier other | + parameterAssignment(scope, name, other) + | + other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + ) + or + exists(Parameter::Range p | + p = scope.getParameter(_) and + name = i.getValue() + | + i = p.(Ruby::BlockParameter).getName() or + i = p.(Ruby::HashSplatParameter).getName() or + i = p.(Ruby::KeywordParameter).getName() or + i = p.(Ruby::OptionalParameter).getName() or + i = p.(Ruby::SplatParameter).getName() + ) +} + +/** Holds if `name` is assigned in `scope` at `i`. */ +private predicate scopeAssigns(Scope::Range scope, string name, Ruby::Identifier i) { + (explicitAssignmentNode(i, _) or implicitAssignmentNode(i)) and + name = i.getValue() and + scope = scopeOf(i) +} + +cached +private module Cached { + cached + newtype TVariable = + TGlobalVariable(string name) { name = any(Ruby::GlobalVariable var).getValue() } or + TClassVariable(Scope::Range scope, string name, Ruby::AstNode decl) { + decl = + min(Ruby::ClassVariable other | + classVariableAccess(other, name, scope) + | + other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + ) + } or + TInstanceVariable(Scope::Range scope, string name, boolean instance, Ruby::AstNode decl) { + decl = + min(Ruby::InstanceVariable other | + instanceVariableAccess(other, name, scope, instance) + | + other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + ) + } or + TLocalVariableReal(Scope::Range scope, string name, Ruby::Identifier i) { + scopeDefinesParameterVariable(scope, name, i) + or + i = + min(Ruby::Identifier other | + scopeAssigns(scope, name, other) + | + other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + ) and + not scopeDefinesParameterVariable(scope, name, _) and + not inherits(scope, name, _) + } or + TLocalVariableSynth(AstNode n, int i) { any(Synthesis s).localVariable(n, i) } + + // Db types that can be vcalls + private class VcallToken = + @ruby_scope_resolution or @ruby_token_constant or @ruby_token_identifier or @ruby_token_super; + + /** + * Holds if `i` is an `identifier` node occurring in the context where it + * should be considered a VCALL. VCALL is the term that MRI/Ripper uses + * internally when there's an identifier without arguments or parentheses, + * i.e. it *might* be a method call, but it might also be a variable access, + * depending on the bindings in the current scope. + * ```rb + * foo # in MRI this is a VCALL, and the predicate should hold for this + * bar() # in MRI this would be an FCALL. Tree-sitter gives us a `call` node, + * # and the `method` field will be an `identifier`, but this predicate + * # will not hold for that identifier. + * ``` + */ + cached + predicate vcall(VcallToken i) { + i = any(Ruby::ArgumentList x).getChild(_) + or + i = any(Ruby::Array x).getChild(_) + or + i = any(Ruby::Assignment x).getRight() + or + i = any(Ruby::Begin x).getChild(_) + or + i = any(Ruby::BeginBlock x).getChild(_) + or + i = any(Ruby::Binary x).getLeft() + or + i = any(Ruby::Binary x).getRight() + or + i = any(Ruby::Block x).getChild(_) + or + i = any(Ruby::BlockArgument x).getChild() + or + i = any(Ruby::Call x).getReceiver() + or + i = any(Ruby::Case x).getValue() + or + i = any(Ruby::Class x).getChild(_) + or + i = any(Ruby::Conditional x).getCondition() + or + i = any(Ruby::Conditional x).getConsequence() + or + i = any(Ruby::Conditional x).getAlternative() + or + i = any(Ruby::Do x).getChild(_) + or + i = any(Ruby::DoBlock x).getChild(_) + or + i = any(Ruby::ElementReference x).getChild(_) + or + i = any(Ruby::ElementReference x).getObject() + or + i = any(Ruby::Else x).getChild(_) + or + i = any(Ruby::Elsif x).getCondition() + or + i = any(Ruby::EndBlock x).getChild(_) + or + i = any(Ruby::Ensure x).getChild(_) + or + i = any(Ruby::Exceptions x).getChild(_) + or + i = any(Ruby::HashSplatArgument x).getChild() + or + i = any(Ruby::If x).getCondition() + or + i = any(Ruby::IfModifier x).getCondition() + or + i = any(Ruby::IfModifier x).getBody() + or + i = any(Ruby::In x).getChild() + or + i = any(Ruby::Interpolation x).getChild(_) + or + i = any(Ruby::KeywordParameter x).getValue() + or + i = any(Ruby::Method x).getChild(_) + or + i = any(Ruby::Module x).getChild(_) + or + i = any(Ruby::OperatorAssignment x).getRight() + or + i = any(Ruby::OptionalParameter x).getValue() + or + i = any(Ruby::Pair x).getKey() + or + i = any(Ruby::Pair x).getValue() + or + i = any(Ruby::ParenthesizedStatements x).getChild(_) + or + i = any(Ruby::Pattern x).getChild() + or + i = any(Ruby::Program x).getChild(_) + or + i = any(Ruby::Range x).getBegin() + or + i = any(Ruby::Range x).getEnd() + or + i = any(Ruby::RescueModifier x).getBody() + or + i = any(Ruby::RescueModifier x).getHandler() + or + i = any(Ruby::RightAssignmentList x).getChild(_) + or + i = any(Ruby::ScopeResolution x).getScope() + or + i = any(Ruby::SingletonClass x).getValue() + or + i = any(Ruby::SingletonClass x).getChild(_) + or + i = any(Ruby::SingletonMethod x).getChild(_) + or + i = any(Ruby::SingletonMethod x).getObject() + or + i = any(Ruby::SplatArgument x).getChild() + or + i = any(Ruby::Superclass x).getChild() + or + i = any(Ruby::Then x).getChild(_) + or + i = any(Ruby::Unary x).getOperand() + or + i = any(Ruby::Unless x).getCondition() + or + i = any(Ruby::UnlessModifier x).getCondition() + or + i = any(Ruby::UnlessModifier x).getBody() + or + i = any(Ruby::Until x).getCondition() + or + i = any(Ruby::UntilModifier x).getCondition() + or + i = any(Ruby::UntilModifier x).getBody() + or + i = any(Ruby::While x).getCondition() + or + i = any(Ruby::WhileModifier x).getCondition() + or + i = any(Ruby::WhileModifier x).getBody() + } + + cached + predicate access(Ruby::Identifier access, VariableReal variable) { + exists(string name | + variable.getNameImpl() = name and + name = access.getValue() + | + variable.getDeclaringScopeImpl() = scopeOf(access) and + not access.getLocation().strictlyBefore(variable.getLocationImpl()) and + // In case of overlapping parameter names, later parameters should not + // be considered accesses to the first parameter + if parameterAssignment(_, _, access) + then scopeDefinesParameterVariable(_, _, access) + else any() + or + exists(Scope::Range declScope | + variable.getDeclaringScopeImpl() = declScope and + inherits(scopeOf(access), name, declScope) + ) + ) + } + + private class Access extends Ruby::Token { + Access() { + access(this, _) or + this instanceof Ruby::GlobalVariable or + this instanceof Ruby::InstanceVariable or + this instanceof Ruby::ClassVariable + } + } + + cached + predicate explicitWriteAccess(Access access, Ruby::AstNode assignment) { + explicitAssignmentNode(access, assignment) + } + + cached + predicate implicitWriteAccess(Access access) { + implicitAssignmentNode(access) + or + scopeDefinesParameterVariable(_, _, access) + } + + cached + predicate isCapturedAccess(LocalVariableAccess access) { + toGenerated(access.getVariable().getDeclaringScope()) != scopeOf(toGenerated(access)) + } + + cached + predicate instanceVariableAccess(Ruby::InstanceVariable var, InstanceVariable v) { + exists(string name, Scope::Range scope, boolean instance | + v = TInstanceVariable(scope, name, instance, _) and + instanceVariableAccess(var, name, scope, instance) + ) + } + + cached + predicate classVariableAccess(Ruby::ClassVariable var, ClassVariable variable) { + exists(Scope::Range scope, string name | + variable = TClassVariable(scope, name, _) and + classVariableAccess(var, name, scope) + ) + } +} + +import Cached + +/** Holds if this scope inherits `name` from an outer scope `outer`. */ +private predicate inherits(Scope::Range scope, string name, Scope::Range outer) { + (scope instanceof Ruby::Block or scope instanceof Ruby::DoBlock) and + not scopeDefinesParameterVariable(scope, name, _) and + ( + outer = scope.getOuterScope() and + ( + scopeDefinesParameterVariable(outer, name, _) + or + exists(Ruby::Identifier i | + scopeAssigns(outer, name, i) and + i.getLocation().strictlyBefore(scope.getLocation()) + ) + ) + or + inherits(scope.getOuterScope(), name, outer) + ) +} + +abstract class VariableImpl extends TVariable { + abstract string getNameImpl(); + + final string toString() { result = this.getNameImpl() } + + abstract Location getLocationImpl(); +} + +class TVariableReal = TGlobalVariable or TClassVariable or TInstanceVariable or TLocalVariableReal; + +class TLocalVariable = TLocalVariableReal or TLocalVariableSynth; + +/** + * This class only exists to avoid negative recursion warnings. Ideally, + * we would use `VariableImpl` directly, but that results in incorrect + * negative recursion warnings. Adding new root-defs for the predicates + * below works around this. + */ +abstract class VariableReal extends TVariableReal { + abstract string getNameImpl(); + + abstract Location getLocationImpl(); + + abstract Scope::Range getDeclaringScopeImpl(); + + final string toString() { result = this.getNameImpl() } +} + +// Convert extensions of `VariableReal` into extensions of `VariableImpl` +private class VariableRealAdapter extends VariableImpl, TVariableReal instanceof VariableReal { + final override string getNameImpl() { result = VariableReal.super.getNameImpl() } + + final override Location getLocationImpl() { result = VariableReal.super.getLocationImpl() } +} + +class LocalVariableReal extends VariableReal, TLocalVariableReal { + private Scope::Range scope; + private string name; + private Ruby::Identifier i; + + LocalVariableReal() { this = TLocalVariableReal(scope, name, i) } + + final override string getNameImpl() { result = name } + + final override Location getLocationImpl() { result = i.getLocation() } + + final override Scope::Range getDeclaringScopeImpl() { result = scope } + + final VariableAccess getDefiningAccessImpl() { toGenerated(result) = i } +} + +class LocalVariableSynth extends VariableImpl, TLocalVariableSynth { + private AstNode n; + private int i; + + LocalVariableSynth() { this = TLocalVariableSynth(n, i) } + + final override string getNameImpl() { + exists(int level | level = desugarLevel(n) | + if level > 0 then result = "__synth__" + i + "__" + level else result = "__synth__" + i + ) + } + + final override Location getLocationImpl() { result = n.getLocation() } +} + +class GlobalVariableImpl extends VariableReal, TGlobalVariable { + private string name; + + GlobalVariableImpl() { this = TGlobalVariable(name) } + + final override string getNameImpl() { result = name } + + final override Location getLocationImpl() { none() } + + final override Scope::Range getDeclaringScopeImpl() { none() } +} + +class InstanceVariableImpl extends VariableReal, TInstanceVariable { + private ModuleBase::Range scope; + private boolean instance; + private string name; + private Ruby::AstNode decl; + + InstanceVariableImpl() { this = TInstanceVariable(scope, name, instance, decl) } + + final override string getNameImpl() { result = name } + + final predicate isClassInstanceVariable() { instance = false } + + final override Location getLocationImpl() { result = decl.getLocation() } + + final override Scope::Range getDeclaringScopeImpl() { result = scope } +} + +class ClassVariableImpl extends VariableReal, TClassVariable { + private ModuleBase::Range scope; + private string name; + private Ruby::AstNode decl; + + ClassVariableImpl() { this = TClassVariable(scope, name, decl) } + + final override string getNameImpl() { result = name } + + final override Location getLocationImpl() { result = decl.getLocation() } + + final override Scope::Range getDeclaringScopeImpl() { result = scope } +} + +abstract class VariableAccessImpl extends Expr, TVariableAccess { + abstract VariableImpl getVariableImpl(); +} + +module LocalVariableAccess { + predicate range(Ruby::Identifier id, LocalVariable v) { + access(id, v) and + ( + explicitWriteAccess(id, _) + or + implicitWriteAccess(id) + or + vcall(id) + ) + } +} + +class TVariableAccessReal = + TLocalVariableAccessReal or TGlobalVariableAccess or TInstanceVariableAccess or + TClassVariableAccess; + +abstract class LocalVariableAccessImpl extends VariableAccessImpl, TLocalVariableAccess { } + +private class LocalVariableAccessReal extends LocalVariableAccessImpl, TLocalVariableAccessReal { + private Ruby::Identifier g; + private LocalVariable v; + + LocalVariableAccessReal() { this = TLocalVariableAccessReal(g, v) } + + final override LocalVariable getVariableImpl() { result = v } + + final override string toString() { result = g.getValue() } +} + +private class LocalVariableAccessSynth extends LocalVariableAccessImpl, TLocalVariableAccessSynth { + private LocalVariable v; + + LocalVariableAccessSynth() { this = TLocalVariableAccessSynth(_, _, v) } + + final override LocalVariable getVariableImpl() { result = v } + + final override string toString() { result = v.getName() } +} + +module GlobalVariableAccess { + predicate range(Ruby::GlobalVariable n, GlobalVariableImpl v) { n.getValue() = v.getNameImpl() } +} + +abstract class GlobalVariableAccessImpl extends VariableAccessImpl, TGlobalVariableAccess { } + +private class GlobalVariableAccessReal extends GlobalVariableAccessImpl, TGlobalVariableAccessReal { + private Ruby::GlobalVariable g; + private GlobalVariable v; + + GlobalVariableAccessReal() { this = TGlobalVariableAccessReal(g, v) } + + final override GlobalVariable getVariableImpl() { result = v } + + final override string toString() { result = g.getValue() } +} + +private class GlobalVariableAccessSynth extends GlobalVariableAccessImpl, TGlobalVariableAccessSynth { + private GlobalVariable v; + + GlobalVariableAccessSynth() { this = TGlobalVariableAccessSynth(_, _, v) } + + final override GlobalVariable getVariableImpl() { result = v } + + final override string toString() { result = v.getName() } +} + +module InstanceVariableAccess { + predicate range(Ruby::InstanceVariable n, InstanceVariable v) { instanceVariableAccess(n, v) } +} + +abstract class InstanceVariableAccessImpl extends VariableAccessImpl, TInstanceVariableAccess { } + +private class InstanceVariableAccessReal extends InstanceVariableAccessImpl, + TInstanceVariableAccessReal { + private Ruby::InstanceVariable g; + private InstanceVariable v; + + InstanceVariableAccessReal() { this = TInstanceVariableAccessReal(g, v) } + + final override InstanceVariable getVariableImpl() { result = v } + + final override string toString() { result = g.getValue() } +} + +private class InstanceVariableAccessSynth extends InstanceVariableAccessImpl, + TInstanceVariableAccessSynth { + private InstanceVariable v; + + InstanceVariableAccessSynth() { this = TInstanceVariableAccessSynth(_, _, v) } + + final override InstanceVariable getVariableImpl() { result = v } + + final override string toString() { result = v.getName() } +} + +module ClassVariableAccess { + predicate range(Ruby::ClassVariable n, ClassVariable v) { classVariableAccess(n, v) } +} + +abstract class ClassVariableAccessRealImpl extends VariableAccessImpl, TClassVariableAccess { } + +private class ClassVariableAccessReal extends ClassVariableAccessRealImpl, TClassVariableAccessReal { + private Ruby::ClassVariable g; + private ClassVariable v; + + ClassVariableAccessReal() { this = TClassVariableAccessReal(g, v) } + + final override ClassVariable getVariableImpl() { result = v } + + final override string toString() { result = g.getValue() } +} + +private class ClassVariableAccessSynth extends ClassVariableAccessRealImpl, + TClassVariableAccessSynth { + private ClassVariable v; + + ClassVariableAccessSynth() { this = TClassVariableAccessSynth(_, _, v) } + + final override ClassVariable getVariableImpl() { result = v } + + final override string toString() { result = v.getName() } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll new file mode 100644 index 00000000000..42aef4b794b --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll @@ -0,0 +1,414 @@ +/** Provides classes representing basic blocks. */ + +private import codeql.Locations +private import codeql.ruby.AST +private import codeql.ruby.ast.internal.AST +private import codeql.ruby.ast.internal.TreeSitter +private import codeql.ruby.controlflow.ControlFlowGraph +private import internal.ControlFlowGraphImpl +private import CfgNodes +private import SuccessorTypes + +/** + * A basic block, that is, a maximal straight-line sequence of control flow nodes + * without branches or joins. + */ +class BasicBlock extends TBasicBlockStart { + /** Gets the scope of this basic block. */ + CfgScope getScope() { result = this.getAPredecessor().getScope() } + + /** Gets an immediate successor of this basic block, if any. */ + BasicBlock getASuccessor() { result = this.getASuccessor(_) } + + /** Gets an immediate successor of this basic block of a given type, if any. */ + BasicBlock getASuccessor(SuccessorType t) { + result.getFirstNode() = this.getLastNode().getASuccessor(t) + } + + /** Gets an immediate predecessor of this basic block, if any. */ + BasicBlock getAPredecessor() { result.getASuccessor() = this } + + /** Gets an immediate predecessor of this basic block of a given type, if any. */ + BasicBlock getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this } + + /** Gets the control flow node at a specific (zero-indexed) position in this basic block. */ + CfgNode getNode(int pos) { bbIndex(this.getFirstNode(), result, pos) } + + /** Gets a control flow node in this basic block. */ + CfgNode getANode() { result = this.getNode(_) } + + /** Gets the first control flow node in this basic block. */ + CfgNode getFirstNode() { this = TBasicBlockStart(result) } + + /** Gets the last control flow node in this basic block. */ + CfgNode getLastNode() { result = this.getNode(this.length() - 1) } + + /** Gets the length of this basic block. */ + int length() { result = strictcount(this.getANode()) } + + /** + * Holds if this basic block immediately dominates basic block `bb`. + * + * That is, all paths reaching basic block `bb` from some entry point + * basic block must go through this basic block (which is an immediate + * predecessor of `bb`). + * + * Example: + * + * ```rb + * def m b + * if b + * return 0 + * end + * return 1 + * end + * ``` + * + * The basic block starting on line 2 immediately dominates the + * basic block on line 5 (all paths from the entry point of `m` + * to `return 1` must go through the `if` block). + */ + predicate immediatelyDominates(BasicBlock bb) { bbIDominates(this, bb) } + + /** + * Holds if this basic block strictly dominates basic block `bb`. + * + * That is, all paths reaching basic block `bb` from some entry point + * basic block must go through this basic block (which must be different + * from `bb`). + * + * Example: + * + * ```rb + * def m b + * if b + * return 0 + * end + * return 1 + * end + * ``` + * + * The basic block starting on line 2 strictly dominates the + * basic block on line 5 (all paths from the entry point of `m` + * to `return 1` must go through the `if` block). + */ + predicate strictlyDominates(BasicBlock bb) { bbIDominates+(this, bb) } + + /** + * Holds if this basic block dominates basic block `bb`. + * + * That is, all paths reaching basic block `bb` from some entry point + * basic block must go through this basic block. + * + * Example: + * + * ```rb + * def m b + * if b + * return 0 + * end + * return 1 + * end + * ``` + * + * The basic block starting on line 2 dominates the basic + * basic block on line 5 (all paths from the entry point of `m` + * to `return 1` must go through the `if` block). + */ + predicate dominates(BasicBlock bb) { + bb = this or + this.strictlyDominates(bb) + } + + /** + * Holds if `df` is in the dominance frontier of this basic block. + * That is, this basic block dominates a predecessor of `df`, but + * does not dominate `df` itself. + * + * Example: + * + * ```rb + * def m x + * if x < 0 + * x = -x + * if x > 10 + * x = x - 1 + * end + * end + * puts x + * end + * ``` + * + * The basic block on line 8 is in the dominance frontier + * of the basic block starting on line 3 because that block + * dominates the basic block on line 4, which is a predecessor of + * `puts x`. Also, the basic block starting on line 3 does not + * dominate the basic block on line 8. + */ + predicate inDominanceFrontier(BasicBlock df) { + this.dominatesPredecessor(df) and + not strictlyDominates(df) + } + + /** + * Holds if this basic block dominates a predecessor of `df`. + */ + private predicate dominatesPredecessor(BasicBlock df) { this.dominates(df.getAPredecessor()) } + + /** + * Gets the basic block that immediately dominates this basic block, if any. + * + * That is, all paths reaching this basic block from some entry point + * basic block must go through the result, which is an immediate basic block + * predecessor of this basic block. + * + * Example: + * + * ```rb + * def m b + * if b + * return 0 + * end + * return 1 + * end + * ``` + * + * The basic block starting on line 2 is an immediate dominator of + * the basic block on line 5 (all paths from the entry point of `m` + * to `return 1` must go through the `if` block, and the `if` block + * is an immediate predecessor of `return 1`). + */ + BasicBlock getImmediateDominator() { bbIDominates(result, this) } + + /** + * Holds if this basic block strictly post-dominates basic block `bb`. + * + * That is, all paths reaching a normal exit point basic block from basic + * block `bb` must go through this basic block (which must be different + * from `bb`). + * + * Example: + * + * ```rb + * def m b + * if b + * puts "b" + * end + * puts "m" + * end + * ``` + * + * The basic block on line 5 strictly post-dominates the basic block on + * line 3 (all paths to the exit point of `m` from `puts "b"` must go + * through `puts "m"`). + */ + predicate strictlyPostDominates(BasicBlock bb) { bbIPostDominates+(this, bb) } + + /** + * Holds if this basic block post-dominates basic block `bb`. + * + * That is, all paths reaching a normal exit point basic block from basic + * block `bb` must go through this basic block. + * + * Example: + * + * ```rb + * def m b + * if b + * puts "b" + * end + * puts "m" + * end + * ``` + * + * The basic block on line 5 post-dominates the basic block on line 3 + * (all paths to the exit point of `m` from `puts "b"` must go through + * `puts "m"`). + */ + predicate postDominates(BasicBlock bb) { + this.strictlyPostDominates(bb) or + this = bb + } + + /** Holds if this basic block is in a loop in the control flow graph. */ + predicate inLoop() { this.getASuccessor+() = this } + + /** Gets a textual representation of this basic block. */ + string toString() { result = this.getFirstNode().toString() } + + /** Gets the location of this basic block. */ + Location getLocation() { result = this.getFirstNode().getLocation() } +} + +cached +private module Cached { + /** Internal representation of basic blocks. */ + cached + newtype TBasicBlock = TBasicBlockStart(CfgNode cfn) { startsBB(cfn) } + + /** Holds if `cfn` starts a new basic block. */ + private predicate startsBB(CfgNode cfn) { + not exists(cfn.getAPredecessor()) and exists(cfn.getASuccessor()) + or + cfn.isJoin() + or + cfn.getAPredecessor().isBranch() + } + + /** + * Holds if `succ` is a control flow successor of `pred` within + * the same basic block. + */ + private predicate intraBBSucc(CfgNode pred, CfgNode succ) { + succ = pred.getASuccessor() and + not startsBB(succ) + } + + /** + * Holds if `cfn` is the `i`th node in basic block `bb`. + * + * In other words, `i` is the shortest distance from a node `bb` + * that starts a basic block to `cfn` along the `intraBBSucc` relation. + */ + cached + predicate bbIndex(CfgNode bbStart, CfgNode cfn, int i) = + shortestDistances(startsBB/1, intraBBSucc/2)(bbStart, cfn, i) + + /** + * Holds if the first node of basic block `succ` is a control flow + * successor of the last node of basic block `pred`. + */ + private predicate succBB(BasicBlock pred, BasicBlock succ) { succ = pred.getASuccessor() } + + /** Holds if `dom` is an immediate dominator of `bb`. */ + cached + predicate bbIDominates(BasicBlock dom, BasicBlock bb) = + idominance(entryBB/1, succBB/2)(_, dom, bb) + + /** Holds if `pred` is a basic block predecessor of `succ`. */ + private predicate predBB(BasicBlock succ, BasicBlock pred) { succBB(pred, succ) } + + /** Holds if `bb` is an exit basic block that represents normal exit. */ + private predicate normalExitBB(BasicBlock bb) { bb.getANode().(AnnotatedExitNode).isNormal() } + + /** Holds if `dom` is an immediate post-dominator of `bb`. */ + cached + predicate bbIPostDominates(BasicBlock dom, BasicBlock bb) = + idominance(normalExitBB/1, predBB/2)(_, dom, bb) + + /** + * Gets the `i`th predecessor of join block `jb`, with respect to some + * arbitrary order. + */ + cached + JoinBlockPredecessor getJoinBlockPredecessor(JoinBlock jb, int i) { + result = + rank[i + 1](JoinBlockPredecessor jbp | + jbp = jb.getAPredecessor() + | + jbp order by JoinBlockPredecessors::getId(jbp), JoinBlockPredecessors::getSplitString(jbp) + ) + } +} + +private import Cached + +/** Holds if `bb` is an entry basic block. */ +private predicate entryBB(BasicBlock bb) { bb.getFirstNode() instanceof EntryNode } + +/** + * An entry basic block, that is, a basic block whose first node is + * an entry node. + */ +class EntryBasicBlock extends BasicBlock { + EntryBasicBlock() { entryBB(this) } + + override CfgScope getScope() { this.getFirstNode() = TEntryNode(result) } +} + +/** + * An annotated exit basic block, that is, a basic block whose last node is + * an annotated exit node. + */ +class AnnotatedExitBasicBlock extends BasicBlock { + private boolean normal; + + AnnotatedExitBasicBlock() { + exists(AnnotatedExitNode n | + n = this.getANode() and + if n.isNormal() then normal = true else normal = false + ) + } + + /** Holds if this block represent a normal exit. */ + final predicate isNormal() { normal = true } +} + +/** + * An exit basic block, that is, a basic block whose last node is + * an exit node. + */ +class ExitBasicBlock extends BasicBlock { + ExitBasicBlock() { this.getLastNode() instanceof ExitNode } +} + +private module JoinBlockPredecessors { + private predicate id(Ruby::AstNode x, Ruby::AstNode y) { x = y } + + private predicate idOf(Ruby::AstNode x, int y) = equivalenceRelation(id/2)(x, y) + + int getId(JoinBlockPredecessor jbp) { + idOf(toGeneratedInclSynth(jbp.getFirstNode().(AstCfgNode).getNode()), result) + or + idOf(toGeneratedInclSynth(jbp.(EntryBasicBlock).getScope()), result) + } + + string getSplitString(JoinBlockPredecessor jbp) { + result = jbp.getFirstNode().(AstCfgNode).getSplitsString() + or + not exists(jbp.getFirstNode().(AstCfgNode).getSplitsString()) and + result = "" + } +} + +/** A basic block with more than one predecessor. */ +class JoinBlock extends BasicBlock { + JoinBlock() { getFirstNode().isJoin() } + + /** + * Gets the `i`th predecessor of this join block, with respect to some + * arbitrary order. + */ + JoinBlockPredecessor getJoinBlockPredecessor(int i) { result = getJoinBlockPredecessor(this, i) } +} + +/** A basic block that is an immediate predecessor of a join block. */ +class JoinBlockPredecessor extends BasicBlock { + JoinBlockPredecessor() { this.getASuccessor() instanceof JoinBlock } +} + +/** A basic block that terminates in a condition, splitting the subsequent control flow. */ +class ConditionBlock extends BasicBlock { + ConditionBlock() { this.getLastNode().isCondition() } + + /** + * Holds if basic block `succ` is immediately controlled by this basic + * block with conditional value `s`. That is, `succ` is an immediate + * successor of this block, and `succ` can only be reached from + * the callable entry point by going via the `s` edge out of this basic block. + */ + pragma[nomagic] + predicate immediatelyControls(BasicBlock succ, BooleanSuccessor s) { + succ = this.getASuccessor(s) and + forall(BasicBlock pred | pred = succ.getAPredecessor() and pred != this | succ.dominates(pred)) + } + + /** + * Holds if basic block `controlled` is controlled by this basic block with + * conditional value `s`. That is, `controlled` can only be reached from + * the callable entry point by going via the `s` edge out of this basic block. + */ + predicate controls(BasicBlock controlled, BooleanSuccessor s) { + exists(BasicBlock succ | this.immediatelyControls(succ, s) | succ.dominates(controlled)) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll new file mode 100644 index 00000000000..dcc1dd6352b --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll @@ -0,0 +1,484 @@ +/** Provides classes representing nodes in a control flow graph. */ + +private import codeql.ruby.AST +private import codeql.ruby.controlflow.BasicBlocks +private import codeql.ruby.dataflow.SSA +private import ControlFlowGraph +private import internal.ControlFlowGraphImpl +private import internal.Splitting + +/** An entry node for a given scope. */ +class EntryNode extends CfgNode, TEntryNode { + private CfgScope scope; + + EntryNode() { this = TEntryNode(scope) } + + final override EntryBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() } + + final override Location getLocation() { result = scope.getLocation() } + + final override string toString() { result = "enter " + scope } +} + +/** An exit node for a given scope, annotated with the type of exit. */ +class AnnotatedExitNode extends CfgNode, TAnnotatedExitNode { + private CfgScope scope; + private boolean normal; + + AnnotatedExitNode() { this = TAnnotatedExitNode(scope, normal) } + + /** Holds if this node represent a normal exit. */ + final predicate isNormal() { normal = true } + + final override AnnotatedExitBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() } + + final override Location getLocation() { result = scope.getLocation() } + + final override string toString() { + exists(string s | + normal = true and s = "normal" + or + normal = false and s = "abnormal" + | + result = "exit " + scope + " (" + s + ")" + ) + } +} + +/** An exit node for a given scope. */ +class ExitNode extends CfgNode, TExitNode { + private CfgScope scope; + + ExitNode() { this = TExitNode(scope) } + + final override Location getLocation() { result = scope.getLocation() } + + final override string toString() { result = "exit " + scope } +} + +/** + * A node for an AST node. + * + * Each AST node maps to zero or more `AstCfgNode`s: zero when the node in unreachable + * (dead) code or not important for control flow, and multiple when there are different + * splits for the AST node. + */ +class AstCfgNode extends CfgNode, TElementNode { + private Splits splits; + private AstNode n; + + AstCfgNode() { this = TElementNode(n, splits) } + + final override AstNode getNode() { result = n } + + override Location getLocation() { result = n.getLocation() } + + final override string toString() { + exists(string s | s = n.(AstNode).toString() | + result = "[" + this.getSplitsString() + "] " + s + or + not exists(this.getSplitsString()) and result = s + ) + } + + /** Gets a comma-separated list of strings for each split in this node, if any. */ + final string getSplitsString() { + result = splits.toString() and + result != "" + } + + /** Gets a split for this control flow node, if any. */ + final Split getASplit() { result = splits.getASplit() } +} + +/** A control-flow node that wraps an AST expression. */ +class ExprCfgNode extends AstCfgNode { + Expr e; + + ExprCfgNode() { e = this.getNode() } + + /** Gets the underlying expression. */ + Expr getExpr() { result = e } + + private ExprCfgNode getSource() { + exists(Ssa::WriteDefinition def | + def.assigns(result) and + this = def.getARead() + ) + } + + /** Gets the textual (constant) value of this expression, if any. */ + string getValueText() { result = this.getSource().getValueText() } +} + +/** A control-flow node that wraps a return-like statement. */ +class ReturningCfgNode extends AstCfgNode { + ReturningStmt s; + + ReturningCfgNode() { s = this.getNode() } + + /** Gets the node of the returned value, if any. */ + ExprCfgNode getReturnedValueNode() { + result = this.getAPredecessor() and + result.getNode() = s.getValue() + } +} + +/** A control-flow node that wraps a `StringComponent` AST expression. */ +class StringComponentCfgNode extends AstCfgNode { + StringComponentCfgNode() { this.getNode() instanceof StringComponent } +} + +private Expr desugar(Expr n) { + result = n.getDesugared() + or + not exists(n.getDesugared()) and + result = n +} + +/** + * A class for mapping parent-child AST nodes to parent-child CFG nodes. + */ +abstract private class ExprChildMapping extends Expr { + /** + * Holds if `child` is a (possibly nested) child of this expression + * for which we would like to find a matching CFG child. + */ + abstract predicate relevantChild(Expr child); + + pragma[nomagic] + private predicate reachesBasicBlock(Expr child, CfgNode cfn, BasicBlock bb) { + this.relevantChild(child) and + cfn = this.getAControlFlowNode() and + bb.getANode() = cfn + or + exists(BasicBlock mid | + this.reachesBasicBlock(child, cfn, mid) and + bb = mid.getAPredecessor() and + not mid.getANode().getNode() = child + ) + } + + /** + * Holds if there is a control-flow path from `cfn` to `cfnChild`, where `cfn` + * is a control-flow node for this expression, and `cfnChild` is a control-flow + * node for `child`. + * + * The path never escapes the syntactic scope of this expression. + */ + cached + predicate hasCfgChild(Expr child, CfgNode cfn, CfgNode cfnChild) { + this.reachesBasicBlock(child, cfn, cfnChild.getBasicBlock()) and + cfnChild = desugar(child).getAControlFlowNode() + } +} + +/** Provides classes for control-flow nodes that wrap AST expressions. */ +module ExprNodes { + private class LiteralChildMapping extends ExprChildMapping, Literal { + override predicate relevantChild(Expr e) { none() } + } + + /** A control-flow node that wraps an `ArrayLiteral` AST expression. */ + class LiteralCfgNode extends ExprCfgNode { + override LiteralChildMapping e; + + override Literal getExpr() { result = super.getExpr() } + + override string getValueText() { result = e.getValueText() } + } + + private class AssignExprChildMapping extends ExprChildMapping, AssignExpr { + override predicate relevantChild(Expr e) { e = this.getAnOperand() } + } + + /** A control-flow node that wraps an `AssignExpr` AST expression. */ + class AssignExprCfgNode extends ExprCfgNode { + override AssignExprChildMapping e; + + final override AssignExpr getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the LHS of this assignment. */ + final ExprCfgNode getLhs() { e.hasCfgChild(e.getLeftOperand(), this, result) } + + /** Gets the RHS of this assignment. */ + final ExprCfgNode getRhs() { e.hasCfgChild(e.getRightOperand(), this, result) } + } + + private class OperationExprChildMapping extends ExprChildMapping, Operation { + override predicate relevantChild(Expr e) { e = this.getAnOperand() } + } + + /** A control-flow node that wraps an `Operation` AST expression. */ + class OperationCfgNode extends ExprCfgNode { + override OperationExprChildMapping e; + + override Operation getExpr() { result = super.getExpr() } + + /** Gets an operand of this operation. */ + final ExprCfgNode getAnOperand() { e.hasCfgChild(e.getAnOperand(), this, result) } + } + + /** A control-flow node that wraps a `BinaryOperation` AST expression. */ + class BinaryOperationCfgNode extends OperationCfgNode { + private BinaryOperation bo; + + BinaryOperationCfgNode() { e = bo } + + override BinaryOperation getExpr() { result = super.getExpr() } + + /** Gets the left operand of this binary operation. */ + final ExprCfgNode getLeftOperand() { e.hasCfgChild(bo.getLeftOperand(), this, result) } + + /** Gets the right operand of this binary operation. */ + final ExprCfgNode getRightOperand() { e.hasCfgChild(bo.getRightOperand(), this, result) } + + final override string getValueText() { + exists(string left, string right, string op | + left = this.getLeftOperand().getValueText() and + right = this.getRightOperand().getValueText() and + op = this.getExpr().getOperator() + | + op = "+" and + ( + result = (left.toInt() + right.toInt()).toString() + or + not (exists(left.toInt()) and exists(right.toInt())) and + result = (left.toFloat() + right.toFloat()).toString() + or + not (exists(left.toFloat()) and exists(right.toFloat())) and + result = left + right + ) + or + op = "-" and + ( + result = (left.toInt() - right.toInt()).toString() + or + not (exists(left.toInt()) and exists(right.toInt())) and + result = (left.toFloat() - right.toFloat()).toString() + ) + or + op = "*" and + ( + result = (left.toInt() * right.toInt()).toString() + or + not (exists(left.toInt()) and exists(right.toInt())) and + result = (left.toFloat() * right.toFloat()).toString() + ) + or + op = "/" and + ( + result = (left.toInt() / right.toInt()).toString() + or + not (exists(left.toInt()) and exists(right.toInt())) and + result = (left.toFloat() / right.toFloat()).toString() + ) + ) + } + } + + private class BlockArgumentChildMapping extends ExprChildMapping, BlockArgument { + override predicate relevantChild(Expr e) { e = this.getValue() } + } + + /** A control-flow node that wraps a `BlockArgument` AST expression. */ + class BlockArgumentCfgNode extends ExprCfgNode { + override BlockArgumentChildMapping e; + + final override BlockArgument getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the value of this block argument. */ + final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) } + } + + private class CallExprChildMapping extends ExprChildMapping, Call { + override predicate relevantChild(Expr e) { + e = [this.getAnArgument(), this.(MethodCall).getReceiver(), this.(MethodCall).getBlock()] + } + } + + /** A control-flow node that wraps a `Call` AST expression. */ + class CallCfgNode extends ExprCfgNode { + override CallExprChildMapping e; + + override Call getExpr() { result = super.getExpr() } + + /** Gets the `n`th argument of this call. */ + final ExprCfgNode getArgument(int n) { e.hasCfgChild(e.getArgument(n), this, result) } + + /** Gets the the keyword argument whose key is `keyword` of this call. */ + final ExprCfgNode getKeywordArgument(string keyword) { + e.hasCfgChild(e.getKeywordArgument(keyword), this, result) + } + + /** Gets the number of arguments of this call. */ + final int getNumberOfArguments() { result = e.getNumberOfArguments() } + + /** Gets the receiver of this call. */ + final ExprCfgNode getReceiver() { e.hasCfgChild(e.(MethodCall).getReceiver(), this, result) } + + /** Gets the block of this call. */ + final ExprCfgNode getBlock() { e.hasCfgChild(e.(MethodCall).getBlock(), this, result) } + } + + private class CaseExprChildMapping extends ExprChildMapping, CaseExpr { + override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) } + } + + /** A control-flow node that wraps a `MethodCall` AST expression. */ + class MethodCallCfgNode extends CallCfgNode { + MethodCallCfgNode() { super.getExpr() instanceof MethodCall } + + override MethodCall getExpr() { result = super.getExpr() } + } + + /** A control-flow node that wraps a `CaseExpr` AST expression. */ + class CaseExprCfgNode extends ExprCfgNode { + override CaseExprChildMapping e; + + final override CaseExpr getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the expression being compared, if any. */ + final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) } + + /** + * Gets the `n`th branch of this case expression. + */ + final ExprCfgNode getBranch(int n) { e.hasCfgChild(e.getBranch(n), this, result) } + } + + private class ConditionalExprChildMapping extends ExprChildMapping, ConditionalExpr { + override predicate relevantChild(Expr e) { e = this.getCondition() or e = this.getBranch(_) } + } + + /** A control-flow node that wraps a `ConditionalExpr` AST expression. */ + class ConditionalExprCfgNode extends ExprCfgNode { + override ConditionalExprChildMapping e; + + final override ConditionalExpr getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the condition expression. */ + final ExprCfgNode getCondition() { e.hasCfgChild(e.getCondition(), this, result) } + + /** + * Gets the branch of this conditional expression that is taken when the condition + * evaluates to cond, if any. + */ + final ExprCfgNode getBranch(boolean cond) { e.hasCfgChild(e.getBranch(cond), this, result) } + } + + private class ConstantAccessChildMapping extends ExprChildMapping, ConstantAccess { + override predicate relevantChild(Expr e) { e = this.getScopeExpr() } + } + + /** A control-flow node that wraps a `ConditionalExpr` AST expression. */ + class ConstantAccessCfgNode extends ExprCfgNode { + override ConstantAccessChildMapping e; + + final override ConstantAccess getExpr() { result = super.getExpr() } + + /** Gets the scope expression. */ + final ExprCfgNode getScopeExpr() { e.hasCfgChild(e.getScopeExpr(), this, result) } + } + + private class StmtSequenceChildMapping extends ExprChildMapping, StmtSequence { + override predicate relevantChild(Expr e) { e = this.getLastStmt() } + } + + /** A control-flow node that wraps a `StmtSequence` AST expression. */ + class StmtSequenceCfgNode extends ExprCfgNode { + override StmtSequenceChildMapping e; + + final override StmtSequence getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the last statement in this sequence, if any. */ + final ExprCfgNode getLastStmt() { e.hasCfgChild(e.getLastStmt(), this, result) } + } + + private class ForExprChildMapping extends ExprChildMapping, ForExpr { + override predicate relevantChild(Expr e) { e = this.getValue() } + } + + /** A control-flow node that wraps a `ForExpr` AST expression. */ + class ForExprCfgNode extends ExprCfgNode { + override ForExprChildMapping e; + + final override ForExpr getExpr() { result = ExprCfgNode.super.getExpr() } + + /** Gets the value being iterated over. */ + final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) } + } + + /** A control-flow node that wraps a `ParenthesizedExpr` AST expression. */ + class ParenthesizedExprCfgNode extends StmtSequenceCfgNode { + ParenthesizedExprCfgNode() { this.getExpr() instanceof ParenthesizedExpr } + } + + /** A control-flow node that wraps a `VariableReadAccess` AST expression. */ + class VariableReadAccessCfgNode extends ExprCfgNode { + override VariableReadAccess e; + + final override VariableReadAccess getExpr() { result = ExprCfgNode.super.getExpr() } + } + + /** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */ + class InstanceVariableWriteAccessCfgNode extends ExprCfgNode { + override InstanceVariableWriteAccess e; + + final override InstanceVariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() } + } + + /** A control-flow node that wraps a `StringInterpolationComponent` AST expression. */ + class StringInterpolationComponentCfgNode extends StmtSequenceCfgNode { + StringInterpolationComponentCfgNode() { this.getNode() instanceof StringInterpolationComponent } + } + + private class StringlikeLiteralChildMapping extends ExprChildMapping, StringlikeLiteral { + override predicate relevantChild(Expr e) { e = this.getComponent(_) } + } + + /** A control-flow node that wraps a `StringlikeLiteral` AST expression. */ + class StringlikeLiteralCfgNode extends ExprCfgNode { + override StringlikeLiteralChildMapping e; + + final override StringlikeLiteral getExpr() { result = super.getExpr() } + + /** Gets a component of this `StringlikeLiteral` */ + StringComponentCfgNode getAComponent() { e.hasCfgChild(e.getComponent(_), this, result) } + } + + /** A control-flow node that wraps a `StringLiteral` AST expression. */ + class StringLiteralCfgNode extends ExprCfgNode { + override StringLiteral e; + + final override StringLiteral getExpr() { result = super.getExpr() } + } + + /** A control-flow node that wraps a `RegExpLiteral` AST expression. */ + class RegExpLiteralCfgNode extends ExprCfgNode { + override RegExpLiteral e; + + final override RegExpLiteral getExpr() { result = super.getExpr() } + } + + /** A control-flow node that wraps a `ComparisonOperation` AST expression. */ + class ComparisonOperationCfgNode extends BinaryOperationCfgNode { + ComparisonOperationCfgNode() { e instanceof ComparisonOperation } + + override ComparisonOperation getExpr() { result = super.getExpr() } + } + + /** A control-flow node that wraps a `RelationalOperation` AST expression. */ + class RelationalOperationCfgNode extends ComparisonOperationCfgNode { + RelationalOperationCfgNode() { e instanceof RelationalOperation } + + final override RelationalOperation getExpr() { result = super.getExpr() } + } + + /** A control-flow node that wraps an `ElementReference` AST expression. */ + class ElementReferenceCfgNode extends MethodCallCfgNode { + ElementReferenceCfgNode() { e instanceof ElementReference } + + final override ElementReference getExpr() { result = super.getExpr() } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll new file mode 100644 index 00000000000..aad9daa4827 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll @@ -0,0 +1,341 @@ +/** Provides classes representing the control flow graph. */ + +private import codeql.Locations +private import codeql.ruby.AST +private import codeql.ruby.controlflow.BasicBlocks +private import SuccessorTypes +private import internal.ControlFlowGraphImpl +private import internal.Splitting +private import internal.Completion + +/** An AST node with an associated control-flow graph. */ +class CfgScope extends Scope instanceof CfgScope::Range_ { + /** Gets the CFG scope that this scope is nested under, if any. */ + final CfgScope getOuterCfgScope() { + exists(AstNode parent | + parent = this.getParent() and + result = getCfgScope(parent) + ) + } +} + +/** + * A control flow node. + * + * A control flow node is a node in the control flow graph (CFG). There is a + * many-to-one relationship between CFG nodes and AST nodes. + * + * Only nodes that can be reached from an entry point are included in the CFG. + */ +class CfgNode extends TNode { + /** Gets a textual representation of this control flow node. */ + string toString() { none() } + + /** Gets the AST node that this node corresponds to, if any. */ + AstNode getNode() { none() } + + /** Gets the location of this control flow node. */ + Location getLocation() { none() } + + /** Gets the file of this control flow node. */ + final File getFile() { result = this.getLocation().getFile() } + + /** Holds if this control flow node has conditional successors. */ + final predicate isCondition() { exists(this.getASuccessor(any(BooleanSuccessor bs))) } + + /** Gets the scope of this node. */ + final CfgScope getScope() { result = this.getBasicBlock().getScope() } + + /** Gets the basic block that this control flow node belongs to. */ + BasicBlock getBasicBlock() { result.getANode() = this } + + /** Gets a successor node of a given type, if any. */ + final CfgNode getASuccessor(SuccessorType t) { result = getASuccessor(this, t) } + + /** Gets an immediate successor, if any. */ + final CfgNode getASuccessor() { result = this.getASuccessor(_) } + + /** Gets an immediate predecessor node of a given flow type, if any. */ + final CfgNode getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this } + + /** Gets an immediate predecessor, if any. */ + final CfgNode getAPredecessor() { result = this.getAPredecessor(_) } + + /** Holds if this node has more than one predecessor. */ + final predicate isJoin() { strictcount(this.getAPredecessor()) > 1 } + + /** Holds if this node has more than one successor. */ + final predicate isBranch() { strictcount(this.getASuccessor()) > 1 } +} + +/** The type of a control flow successor. */ +class SuccessorType extends TSuccessorType { + /** Gets a textual representation of successor type. */ + string toString() { none() } +} + +/** Provides different types of control flow successor types. */ +module SuccessorTypes { + /** A normal control flow successor. */ + class NormalSuccessor extends SuccessorType, TSuccessorSuccessor { + final override string toString() { result = "successor" } + } + + /** + * A conditional control flow successor. Either a Boolean successor (`BooleanSuccessor`), + * an emptiness successor (`EmptinessSuccessor`), or a matching successor + * (`MatchingSuccessor`) + */ + class ConditionalSuccessor extends SuccessorType { + boolean value; + + ConditionalSuccessor() { + this = TBooleanSuccessor(value) or + this = TEmptinessSuccessor(value) or + this = TMatchingSuccessor(value) + } + + /** Gets the Boolean value of this successor. */ + final boolean getValue() { result = value } + + override string toString() { result = getValue().toString() } + } + + /** + * A Boolean control flow successor. + * + * For example, in + * + * ```rb + * if x >= 0 + * puts "positive" + * else + * puts "negative" + * end + * ``` + * + * `x >= 0` has both a `true` successor and a `false` successor. + */ + class BooleanSuccessor extends ConditionalSuccessor, TBooleanSuccessor { } + + /** + * An emptiness control flow successor. + * + * For example, this program fragment: + * + * ```rb + * for arg in args do + * puts arg + * end + * puts "done"; + * ``` + * + * has a control flow graph containing emptiness successors: + * + * ``` + * args + * | + * for------<----- + * / \ \ + * / \ | + * / \ | + * / \ | + * empty non-empty | + * | \ | + * puts "done" \ | + * arg | + * | | + * puts arg | + * \___/ + * ``` + */ + class EmptinessSuccessor extends ConditionalSuccessor, TEmptinessSuccessor { + override string toString() { if value = true then result = "empty" else result = "non-empty" } + } + + /** + * A matching control flow successor. + * + * For example, this program fragment: + * + * ```rb + * case x + * when 1 then puts "one" + * else puts "not one" + * end + * ``` + * + * has a control flow graph containing matching successors: + * + * ``` + * x + * | + * 1 + * / \ + * / \ + * / \ + * / \ + * match non-match + * | | + * puts "one" puts "not one" + * ``` + */ + class MatchingSuccessor extends ConditionalSuccessor, TMatchingSuccessor { + override string toString() { if value = true then result = "match" else result = "no-match" } + } + + /** + * A `return` control flow successor. + * + * Example: + * + * ```rb + * def sum(x,y) + * return x + y + * end + * ``` + * + * The exit node of `sum` is a `return` successor of the `return x + y` + * statement. + */ + class ReturnSuccessor extends SuccessorType, TReturnSuccessor { + final override string toString() { result = "return" } + } + + /** + * A `break` control flow successor. + * + * Example: + * + * ```rb + * def m + * while x >= 0 + * x -= 1 + * if num > 100 + * break + * end + * end + * puts "done" + * end + * ``` + * + * The node `puts "done"` is `break` successor of the node `break`. + */ + class BreakSuccessor extends SuccessorType, TBreakSuccessor { + final override string toString() { result = "break" } + } + + /** + * A `next` control flow successor. + * + * Example: + * + * ```rb + * def m + * while x >= 0 + * x -= 1 + * if num > 100 + * next + * end + * end + * puts "done" + * end + * ``` + * + * The node `x >= 0` is `next` successor of the node `next`. + */ + class NextSuccessor extends SuccessorType, TNextSuccessor { + final override string toString() { result = "next" } + } + + /** + * A `redo` control flow successor. + * + * Example: + * + * Example: + * + * ```rb + * def m + * while x >= 0 + * x -= 1 + * if num > 100 + * redo + * end + * end + * puts "done" + * end + * ``` + * + * The node `x -= 1` is `redo` successor of the node `redo`. + */ + class RedoSuccessor extends SuccessorType, TRedoSuccessor { + final override string toString() { result = "redo" } + } + + /** + * A `retry` control flow successor. + * + * Example: + * + * Example: + * + * ```rb + * def m + * begin + * puts "Retry" + * raise + * rescue + * retry + * end + * end + * ``` + * + * The node `puts "Retry"` is `retry` successor of the node `retry`. + */ + class RetrySuccessor extends SuccessorType, TRetrySuccessor { + final override string toString() { result = "retry" } + } + + /** + * An exceptional control flow successor. + * + * Example: + * + * ```rb + * def m x + * if x > 2 + * raise "x > 2" + * end + * puts "x <= 2" + * end + * ``` + * + * The exit node of `m` is an exceptional successor of the node + * `raise "x > 2"`. + */ + class RaiseSuccessor extends SuccessorType, TRaiseSuccessor { + final override string toString() { result = "raise" } + } + + /** + * An exit control flow successor. + * + * Example: + * + * ```rb + * def m x + * if x > 2 + * exit 1 + * end + * puts "x <= 2" + * end + * ``` + * + * The exit node of `m` is an exit successor of the node + * `exit 1`. + */ + class ExitSuccessor extends SuccessorType, TExitSuccessor { + final override string toString() { result = "exit" } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll new file mode 100644 index 00000000000..e7f64d1318e --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll @@ -0,0 +1,507 @@ +/** + * Provides classes representing control flow completions. + * + * A completion represents how a statement or expression terminates. + */ + +private import codeql.ruby.AST +private import codeql.ruby.ast.internal.AST +private import codeql.ruby.controlflow.ControlFlowGraph +private import ControlFlowGraphImpl +private import NonReturning +private import SuccessorTypes + +private newtype TCompletion = + TSimpleCompletion() or + TBooleanCompletion(boolean b) { b in [false, true] } or + TEmptinessCompletion(boolean isEmpty) { isEmpty in [false, true] } or + TMatchingCompletion(boolean isMatch) { isMatch in [false, true] } or + TReturnCompletion() or + TBreakCompletion() or + TNextCompletion() or + TRedoCompletion() or + TRetryCompletion() or + TRaiseCompletion() or // TODO: Add exception type? + TExitCompletion() or + TNestedCompletion(Completion inner, Completion outer, int nestLevel) { + inner = TBreakCompletion() and + outer instanceof NonNestedNormalCompletion and + nestLevel = 0 + or + inner instanceof NormalCompletion and + nestedEnsureCompletion(outer, nestLevel) + } + +pragma[noinline] +private predicate nestedEnsureCompletion(Completion outer, int nestLevel) { + ( + outer = TReturnCompletion() + or + outer = TBreakCompletion() + or + outer = TNextCompletion() + or + outer = TRedoCompletion() + or + outer = TRetryCompletion() + or + outer = TRaiseCompletion() + or + outer = TExitCompletion() + ) and + nestLevel = any(Trees::BodyStmtTree t).getNestLevel() +} + +pragma[noinline] +private predicate completionIsValidForStmt(AstNode n, Completion c) { + n = TForIn(_) and + c instanceof EmptinessCompletion + or + n instanceof BreakStmt and + c = TBreakCompletion() + or + n instanceof NextStmt and + c = TNextCompletion() + or + n instanceof RedoStmt and + c = TRedoCompletion() + or + n instanceof ReturnStmt and + c = TReturnCompletion() +} + +/** + * Holds if `c` happens in an exception-aware context, that is, it may be + * `rescue`d or `ensure`d. In such cases, we assume that the target of `c` + * may raise an exception (in addition to evaluating normally). + */ +private predicate mayRaise(Call c) { + exists(Trees::BodyStmtTree bst | c = bst.getBodyChild(_, true).getAChild*() | + exists(bst.getARescue()) + or + exists(bst.getEnsure()) + ) +} + +/** A completion of a statement or an expression. */ +abstract class Completion extends TCompletion { + /** Holds if this completion is valid for node `n`. */ + predicate isValidFor(AstNode n) { + this = n.(NonReturningCall).getACompletion() + or + completionIsValidForStmt(n, this) + or + mustHaveBooleanCompletion(n) and + ( + exists(boolean value | isBooleanConstant(n, value) | this = TBooleanCompletion(value)) + or + not isBooleanConstant(n, _) and + this = TBooleanCompletion(_) + ) + or + mustHaveMatchingCompletion(n) and + this = TMatchingCompletion(_) + or + n = any(RescueModifierExpr parent).getBody() and this = TRaiseCompletion() + or + mayRaise(n) and + this = TRaiseCompletion() + or + not n instanceof NonReturningCall and + not completionIsValidForStmt(n, _) and + not mustHaveBooleanCompletion(n) and + not mustHaveMatchingCompletion(n) and + this = TSimpleCompletion() + } + + /** + * Holds if this completion will continue a loop when it is the completion + * of a loop body. + */ + predicate continuesLoop() { + this instanceof NormalCompletion or + this instanceof NextCompletion + } + + /** + * Gets the inner completion. This is either the inner completion, + * when the completion is nested, or the completion itself. + */ + Completion getInnerCompletion() { result = this } + + /** + * Gets the outer completion. This is either the outer completion, + * when the completion is nested, or the completion itself. + */ + Completion getOuterCompletion() { result = this } + + /** Gets a successor type that matches this completion. */ + abstract SuccessorType getAMatchingSuccessorType(); + + /** Gets a textual representation of this completion. */ + abstract string toString(); +} + +/** Holds if node `n` has the Boolean constant value `value`. */ +private predicate isBooleanConstant(AstNode n, boolean value) { + mustHaveBooleanCompletion(n) and + ( + n.(BooleanLiteral).isTrue() and + value = true + or + n.(BooleanLiteral).isFalse() and + value = false + ) +} + +/** + * Holds if a normal completion of `n` must be a Boolean completion. + */ +private predicate mustHaveBooleanCompletion(AstNode n) { + inBooleanContext(n) and + not n instanceof NonReturningCall +} + +/** + * Holds if `n` is used in a Boolean context. That is, the value + * that `n` evaluates to determines a true/false branch successor. + */ +private predicate inBooleanContext(AstNode n) { + exists(ConditionalExpr i | + n = i.getCondition() + or + inBooleanContext(i) and + n = i.getBranch(_) + ) + or + n = any(ConditionalLoop parent).getCondition() + or + exists(LogicalAndExpr parent | + n = parent.getLeftOperand() + or + inBooleanContext(parent) and + n = parent.getRightOperand() + ) + or + exists(LogicalOrExpr parent | + n = parent.getLeftOperand() + or + inBooleanContext(parent) and + n = parent.getRightOperand() + ) + or + n = any(NotExpr parent | inBooleanContext(parent)).getOperand() + or + n = any(StmtSequence parent | inBooleanContext(parent)).getLastStmt() + or + exists(CaseExpr c, WhenExpr w | + not exists(c.getValue()) and + c.getAWhenBranch() = w and + w.getPattern(_) = n + ) +} + +/** + * Holds if a normal completion of `n` must be a matching completion. + */ +private predicate mustHaveMatchingCompletion(AstNode n) { + inMatchingContext(n) and + not n instanceof NonReturningCall +} + +/** + * Holds if `n` is used in a matching context. That is, whether or + * not the value of `n` matches, determines the successor. + */ +private predicate inMatchingContext(AstNode n) { + n = any(RescueClause r).getException(_) + or + exists(CaseExpr c, WhenExpr w | + exists(c.getValue()) and + c.getAWhenBranch() = w and + w.getPattern(_) = n + ) + or + n.(Trees::DefaultValueParameterTree).hasDefaultValue() +} + +/** + * A completion that represents normal evaluation of a statement or an + * expression. + */ +abstract class NormalCompletion extends Completion { } + +abstract private class NonNestedNormalCompletion extends NormalCompletion { } + +/** A simple (normal) completion. */ +class SimpleCompletion extends NonNestedNormalCompletion, TSimpleCompletion { + override NormalSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { result = "simple" } +} + +/** + * A completion that represents evaluation of an expression, whose value determines + * the successor. Either a Boolean completion (`BooleanCompletion`), an emptiness + * completion (`EmptinessCompletion`), or a matching completion (`MatchingCompletion`). + */ +abstract class ConditionalCompletion extends NonNestedNormalCompletion { + boolean value; + + bindingset[value] + ConditionalCompletion() { any() } + + /** Gets the Boolean value of this conditional completion. */ + final boolean getValue() { result = value } +} + +/** + * A completion that represents evaluation of an expression + * with a Boolean value. + */ +class BooleanCompletion extends ConditionalCompletion, TBooleanCompletion { + BooleanCompletion() { this = TBooleanCompletion(value) } + + /** Gets the dual Boolean completion. */ + BooleanCompletion getDual() { result = TBooleanCompletion(value.booleanNot()) } + + override BooleanSuccessor getAMatchingSuccessorType() { result.getValue() = value } + + override string toString() { result = value.toString() } +} + +/** A Boolean `true` completion. */ +class TrueCompletion extends BooleanCompletion { + TrueCompletion() { this.getValue() = true } +} + +/** A Boolean `false` completion. */ +class FalseCompletion extends BooleanCompletion { + FalseCompletion() { this.getValue() = false } +} + +/** + * A completion that represents evaluation of an emptiness test, for example + * a test in a `for in` statement. + */ +class EmptinessCompletion extends ConditionalCompletion, TEmptinessCompletion { + EmptinessCompletion() { this = TEmptinessCompletion(value) } + + override EmptinessSuccessor getAMatchingSuccessorType() { result.getValue() = value } + + override string toString() { if value = true then result = "empty" else result = "non-empty" } +} + +/** + * A completion that represents evaluation of a matching test, for example + * a test in a `rescue` statement. + */ +class MatchingCompletion extends ConditionalCompletion, TMatchingCompletion { + MatchingCompletion() { this = TMatchingCompletion(value) } + + override MatchingSuccessor getAMatchingSuccessorType() { result.getValue() = value } + + override string toString() { if value = true then result = "match" else result = "no-match" } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a return. + */ +class ReturnCompletion extends Completion { + ReturnCompletion() { + this = TReturnCompletion() or + this = TNestedCompletion(_, TReturnCompletion(), _) + } + + override ReturnSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TReturnCompletion() and result = "return" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a break from a loop. + */ +class BreakCompletion extends Completion { + BreakCompletion() { + this = TBreakCompletion() or + this = TNestedCompletion(_, TBreakCompletion(), _) + } + + override BreakSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TBreakCompletion() and result = "break" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a continuation of a loop. + */ +class NextCompletion extends Completion { + NextCompletion() { + this = TNextCompletion() or + this = TNestedCompletion(_, TNextCompletion(), _) + } + + override NextSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TNextCompletion() and result = "next" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a redo of a loop iteration. + */ +class RedoCompletion extends Completion { + RedoCompletion() { + this = TRedoCompletion() or + this = TNestedCompletion(_, TRedoCompletion(), _) + } + + override RedoSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TRedoCompletion() and result = "redo" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a retry. + */ +class RetryCompletion extends Completion { + RetryCompletion() { + this = TRetryCompletion() or + this = TNestedCompletion(_, TRetryCompletion(), _) + } + + override RetrySuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TRetryCompletion() and result = "retry" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in a thrown exception. + */ +class RaiseCompletion extends Completion { + RaiseCompletion() { + this = TRaiseCompletion() or + this = TNestedCompletion(_, TRaiseCompletion(), _) + } + + override RaiseSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TRaiseCompletion() and result = "raise" + } +} + +/** + * A completion that represents evaluation of a statement or an + * expression resulting in an abort/exit. + */ +class ExitCompletion extends Completion { + ExitCompletion() { + this = TExitCompletion() or + this = TNestedCompletion(_, TExitCompletion(), _) + } + + override ExitSuccessor getAMatchingSuccessorType() { any() } + + override string toString() { + // `NestedCompletion` defines `toString()` for the other case + this = TExitCompletion() and result = "exit" + } +} + +/** + * A nested completion. For example, in + * + * ```rb + * def m + * while x >= 0 + * x -= 1 + * if num > 100 + * break + * end + * end + * puts "done" + * end + * ``` + * + * the `while` loop can have a nested completion where the inner completion + * is a `break` and the outer completion is a simple successor. + */ +abstract class NestedCompletion extends Completion, TNestedCompletion { + Completion inner; + Completion outer; + int nestLevel; + + NestedCompletion() { this = TNestedCompletion(inner, outer, nestLevel) } + + /** Gets a completion that is compatible with the inner completion. */ + abstract Completion getAnInnerCompatibleCompletion(); + + /** Gets the level of this nested completion. */ + final int getNestLevel() { result = nestLevel } + + override string toString() { result = outer + " [" + inner + "] (" + nestLevel + ")" } +} + +class NestedBreakCompletion extends NormalCompletion, NestedCompletion { + NestedBreakCompletion() { + inner = TBreakCompletion() and + outer instanceof NonNestedNormalCompletion + } + + override BreakCompletion getInnerCompletion() { result = inner } + + override NonNestedNormalCompletion getOuterCompletion() { result = outer } + + override Completion getAnInnerCompatibleCompletion() { + result = inner and + outer = TSimpleCompletion() + or + result = TNestedCompletion(outer, inner, _) + } + + override SuccessorType getAMatchingSuccessorType() { + outer instanceof SimpleCompletion and + result instanceof BreakSuccessor + or + result = outer.(ConditionalCompletion).getAMatchingSuccessorType() + } +} + +class NestedEnsureCompletion extends NestedCompletion { + NestedEnsureCompletion() { + inner instanceof NormalCompletion and + nestedEnsureCompletion(outer, nestLevel) + } + + override NormalCompletion getInnerCompletion() { result = inner } + + override Completion getOuterCompletion() { result = outer } + + override Completion getAnInnerCompatibleCompletion() { + result.getOuterCompletion() = this.getInnerCompletion() + } + + override SuccessorType getAMatchingSuccessorType() { none() } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll new file mode 100644 index 00000000000..5bfea3aca7b --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll @@ -0,0 +1,1164 @@ +/** + * Provides auxiliary classes and predicates used to construct the basic successor + * relation on control flow elements. + * + * The implementation is centered around the concept of a _completion_, which + * models how the execution of a statement or expression terminates. + * Completions are represented as an algebraic data type `Completion` defined in + * `Completion.qll`. + * + * The CFG is built by structural recursion over the AST. To achieve this the + * CFG edges related to a given AST node, `n`, are divided into three categories: + * + * 1. The in-going edge that points to the first CFG node to execute when + * `n` is going to be executed. + * 2. The out-going edges for control flow leaving `n` that are going to some + * other node in the surrounding context of `n`. + * 3. The edges that have both of their end-points entirely within the AST + * node and its children. + * + * The edges in (1) and (2) are inherently non-local and are therefore + * initially calculated as half-edges, that is, the single node, `k`, of the + * edge contained within `n`, by the predicates `k = first(n)` and `k = last(n, _)`, + * respectively. The edges in (3) can then be enumerated directly by the predicate + * `succ` by calling `first` and `last` recursively on the children of `n` and + * connecting the end-points. This yields the entire CFG, since all edges are in + * (3) for _some_ AST node. + * + * The second parameter of `last` is the completion, which is necessary to distinguish + * the out-going edges from `n`. Note that the completion changes as the calculation of + * `last` proceeds outward through the AST; for example, a `BreakCompletion` is + * caught up by its surrounding loop and turned into a `NormalCompletion`. + */ + +private import codeql.ruby.AST +private import codeql.ruby.ast.internal.AST as ASTInternal +private import codeql.ruby.ast.internal.Scope +private import codeql.ruby.ast.Scope +private import codeql.ruby.ast.internal.TreeSitter +private import codeql.ruby.ast.internal.Variable +private import codeql.ruby.controlflow.ControlFlowGraph +private import Completion +import ControlFlowGraphImplShared + +module CfgScope { + abstract class Range_ extends AstNode { + abstract predicate entry(AstNode first); + + abstract predicate exit(AstNode last, Completion c); + } + + private class ToplevelScope extends Range_, Toplevel { + final override predicate entry(AstNode first) { first(this, first) } + + final override predicate exit(AstNode last, Completion c) { last(this, last, c) } + } + + private class EndBlockScope extends Range_, EndBlock { + final override predicate entry(AstNode first) { + first(this.(Trees::EndBlockTree).getBodyChild(0, _), first) + } + + final override predicate exit(AstNode last, Completion c) { + last(this.(Trees::EndBlockTree).getLastBodyChild(), last, c) + } + } + + private class BodyStmtCallableScope extends Range_, ASTInternal::TBodyStmt, Callable { + final override predicate entry(AstNode first) { this.(Trees::BodyStmtTree).firstInner(first) } + + final override predicate exit(AstNode last, Completion c) { + this.(Trees::BodyStmtTree).lastInner(last, c) + } + } + + private class BraceBlockScope extends Range_, BraceBlock { + final override predicate entry(AstNode first) { + first(this.(Trees::BraceBlockTree).getBodyChild(0, _), first) + } + + final override predicate exit(AstNode last, Completion c) { + last(this.(Trees::BraceBlockTree).getLastBodyChild(), last, c) + } + } +} + +/** Holds if `first` is first executed when entering `scope`. */ +pragma[nomagic] +predicate succEntry(CfgScope::Range_ scope, AstNode first) { scope.entry(first) } + +/** Holds if `last` with completion `c` can exit `scope`. */ +pragma[nomagic] +predicate succExit(CfgScope::Range_ scope, AstNode last, Completion c) { scope.exit(last, c) } + +// TODO: remove this class; it should be replaced with an implicit non AST node +private class ForIn extends AstNode, ASTInternal::TForIn { + final override string toString() { result = "In" } +} + +// TODO: remove this class; it should be replaced with an implicit non AST node +private class ForRange extends ForExpr { + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "" and + result = this.getIn() + } + + ForIn getIn() { + result = ASTInternal::TForIn(ASTInternal::toGenerated(this).(Ruby::For).getValue()) + } +} + +/** Defines the CFG by dispatch on the various AST types. */ +module Trees { + private class AliasStmtTree extends StandardPreOrderTree, AliasStmt { + final override ControlFlowTree getChildElement(int i) { + result = this.getNewName() and i = 0 + or + result = this.getOldName() and i = 1 + } + } + + private class ArgumentListTree extends StandardTree, ArgumentList { + final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) } + + final override predicate first(AstNode first) { first(this.getFirstChildElement(), first) } + + final override predicate last(AstNode last, Completion c) { + last(this.getLastChildElement(), last, c) + } + } + + private class AssignExprTree extends StandardPostOrderTree, AssignExpr { + AssignExprTree() { + exists(Expr left | left = this.getLeftOperand() | + left instanceof VariableAccess or + left instanceof ConstantAccess + ) + } + + final override ControlFlowTree getChildElement(int i) { + result = this.getLeftOperand() and i = 0 + or + result = this.getRightOperand() and i = 1 + } + } + + private class BeginTree extends BodyStmtTree, BeginExpr { + final override predicate first(AstNode first) { this.firstInner(first) } + + final override predicate last(AstNode last, Completion c) { this.lastInner(last, c) } + + final override predicate propagatesAbnormal(AstNode child) { none() } + } + + private class BlockArgumentTree extends StandardPostOrderTree, BlockArgument { + final override ControlFlowTree getChildElement(int i) { result = this.getValue() and i = 0 } + } + + abstract private class NonDefaultValueParameterTree extends ControlFlowTree, NamedParameter { + final override predicate first(AstNode first) { + this.getDefiningAccess().(ControlFlowTree).first(first) + } + + final override predicate last(AstNode last, Completion c) { + this.getDefiningAccess().(ControlFlowTree).last(last, c) + } + + override predicate propagatesAbnormal(AstNode child) { + this.getDefiningAccess().(ControlFlowTree).propagatesAbnormal(child) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { none() } + } + + private class BlockParameterTree extends NonDefaultValueParameterTree, BlockParameter { } + + abstract class BodyStmtTree extends StmtSequenceTree, BodyStmt { + override predicate first(AstNode first) { first = this } + + predicate firstInner(AstNode first) { + first(this.getBodyChild(0, _), first) + or + not exists(this.getBodyChild(_, _)) and + ( + first(this.getRescue(_), first) + or + not exists(this.getRescue(_)) and + first(this.getEnsure(), first) + ) + } + + predicate lastInner(AstNode last, Completion c) { + exists(boolean ensurable | last = this.getAnEnsurePredecessor(c, ensurable) | + not this.hasEnsure() + or + ensurable = false + ) + or + // If the body completes normally, take the completion from the `ensure` block + this.lastEnsure(last, c, any(NormalCompletion nc), _) + or + // If the `ensure` block completes normally, it inherits any non-normal + // completion from the body + c = + any(NestedEnsureCompletion nec | + this.lastEnsure(last, nec.getAnInnerCompatibleCompletion(), nec.getOuterCompletion(), + nec.getNestLevel()) + ) + or + not exists(this.getBodyChild(_, _)) and + not exists(this.getRescue(_)) and + this.lastEnsure0(last, c) + or + last([this.getEnsure(), this.getBodyChild(_, false)], last, c) and + not c instanceof NormalCompletion + } + + override predicate succ(AstNode pred, AstNode succ, Completion c) { + // Normal left-to-right evaluation in the body + exists(int i | + last(this.getBodyChild(i, _), pred, c) and + first(this.getBodyChild(i + 1, _), succ) and + c instanceof NormalCompletion + ) + or + // Exceptional flow from body to first `rescue` + this.lastBody(pred, c, true) and + first(this.getRescue(0), succ) and + c instanceof RaiseCompletion + or + // Flow from one `rescue` clause to the next when there is no match + exists(RescueTree rescue, int i | rescue = this.getRescue(i) | + rescue.lastNoMatch(pred, c) and + first(this.getRescue(i + 1), succ) + ) + or + // Flow from body to `else` block when no exception + this.lastBody(pred, c, _) and + first(this.getElse(), succ) and + c instanceof NormalCompletion + or + // Flow into `ensure` block + pred = getAnEnsurePredecessor(c, true) and + first(this.getEnsure(), succ) + } + + /** + * Gets a last element from this block that may finish with completion `c`, such + * that control may be transferred to the `ensure` block (if it exists), but only + * if `ensurable = true`. + */ + pragma[nomagic] + private AstNode getAnEnsurePredecessor(Completion c, boolean ensurable) { + this.lastBody(result, c, ensurable) and + ( + // Any non-throw completion will always continue directly to the `ensure` block, + // unless there is an `else` block + not c instanceof RaiseCompletion and + not exists(this.getElse()) + or + // Any completion will continue to the `ensure` block when there are no `rescue` + // blocks + not exists(this.getRescue(_)) + ) + or + // Last element from any matching `rescue` block continues to the `ensure` block + this.getRescue(_).(RescueTree).lastMatch(result, c) and + ensurable = true + or + // If the last `rescue` block does not match, continue to the `ensure` block + exists(int lst, MatchingCompletion mc | + this.getRescue(lst).(RescueTree).lastNoMatch(result, mc) and + mc.getValue() = false and + not exists(this.getRescue(lst + 1)) and + c = + any(NestedEnsureCompletion nec | + nec.getOuterCompletion() instanceof RaiseCompletion and + nec.getInnerCompletion() = mc and + nec.getNestLevel() = 0 + ) and + ensurable = true + ) + or + // Last element of `else` block continues to the `ensure` block + last(this.getElse(), result, c) and + ensurable = true + } + + pragma[nomagic] + private predicate lastEnsure0(AstNode last, Completion c) { last(this.getEnsure(), last, c) } + + /** + * Gets a descendant that belongs to the `ensure` block of this block, if any. + * Nested `ensure` blocks are not included. + */ + pragma[nomagic] + AstNode getAnEnsureDescendant() { + result = this.getEnsure() + or + exists(AstNode mid | + mid = this.getAnEnsureDescendant() and + result = mid.getAChild() and + getCfgScope(result) = getCfgScope(mid) and + not exists(BodyStmt nestedBlock | + result = nestedBlock.getEnsure() and + nestedBlock != this + ) + ) + } + + /** + * Holds if `innerBlock` has an `ensure` block and is immediately nested inside the + * `ensure` block of this block. + */ + private predicate nestedEnsure(BodyStmtTree innerBlock) { + exists(StmtSequence innerEnsure | + innerEnsure = this.getAnEnsureDescendant().getAChild() and + getCfgScope(innerEnsure) = getCfgScope(this) and + innerEnsure = innerBlock.(BodyStmt).getEnsure() + ) + } + + /** + * Gets the `ensure`-nesting level of this block. That is, the number of `ensure` + * blocks that this block is nested under. + */ + int getNestLevel() { result = count(BodyStmtTree outer | outer.nestedEnsure+(this)) } + + pragma[nomagic] + private predicate lastEnsure( + AstNode last, NormalCompletion ensure, Completion outer, int nestLevel + ) { + this.lastEnsure0(last, ensure) and + exists( + this.getAnEnsurePredecessor(any(Completion c0 | outer = c0.getOuterCompletion()), true) + ) and + nestLevel = this.getNestLevel() + } + + /** + * Holds if `last` is a last element in the body of this block. `ensurable` + * indicates whether `last` may be a predecessor of an `ensure` block. + */ + pragma[nomagic] + private predicate lastBody(AstNode last, Completion c, boolean ensurable) { + exists(boolean rescuable | + if c instanceof RaiseCompletion then ensurable = rescuable else ensurable = true + | + last(this.getBodyChild(_, rescuable), last, c) and + not c instanceof NormalCompletion + or + exists(int lst | + last(this.getBodyChild(lst, rescuable), last, c) and + not exists(this.getBodyChild(lst + 1, _)) + ) + ) + } + } + + private class BooleanLiteralTree extends LeafTree, BooleanLiteral { } + + class BraceBlockTree extends StmtSequenceTree, BraceBlock { + final override predicate propagatesAbnormal(AstNode child) { none() } + + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getParameter(i) and rescuable = false + or + result = StmtSequenceTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable) + } + + override predicate first(AstNode first) { first = this } + + override predicate succ(AstNode pred, AstNode succ, Completion c) { + // Normal left-to-right evaluation in the body + exists(int i | + last(this.getBodyChild(i, _), pred, c) and + first(this.getBodyChild(i + 1, _), succ) and + c instanceof NormalCompletion + ) + } + } + + private class CallTree extends StandardPostOrderTree, Call { + CallTree() { + // Logical operations are handled separately + not this instanceof UnaryLogicalOperation and + not this instanceof BinaryLogicalOperation + } + + override ControlFlowTree getChildElement(int i) { result = this.getArgument(i) } + } + + private class CaseTree extends PreOrderTree, CaseExpr { + final override predicate propagatesAbnormal(AstNode child) { + child = this.getValue() or child = this.getABranch() + } + + final override predicate last(AstNode last, Completion c) { + last(this.getValue(), last, c) and not exists(this.getABranch()) + or + last(this.getAWhenBranch().getBody(), last, c) + or + exists(int i, ControlFlowTree lastBranch | + lastBranch = this.getBranch(i) and + not exists(this.getBranch(i + 1)) and + last(lastBranch, last, c) + ) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + exists(AstNode next | + pred = this and + first(next, succ) and + c instanceof SimpleCompletion + | + next = this.getValue() + or + not exists(this.getValue()) and + next = this.getBranch(0) + ) + or + last(this.getValue(), pred, c) and + first(this.getBranch(0), succ) and + c instanceof SimpleCompletion + or + exists(int i, WhenTree branch | branch = this.getBranch(i) | + last(branch.getLastPattern(), pred, c) and + first(this.getBranch(i + 1), succ) and + c.(ConditionalCompletion).getValue() = false + ) + } + } + + private class CharacterTree extends LeafTree, CharacterLiteral { } + + private class ClassDeclarationTree extends NamespaceTree, ClassDeclaration { + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getScopeExpr() and i = 0 and rescuable = false + or + result = this.getSuperclassExpr() and + i = count(this.getScopeExpr()) and + rescuable = true + or + result = + super + .getBodyChild(i - count(this.getScopeExpr()) - count(this.getSuperclassExpr()), + rescuable) + } + } + + private class ClassVariableTree extends LeafTree, ClassVariableAccess { } + + private class ConditionalExprTree extends PostOrderTree, ConditionalExpr { + final override predicate propagatesAbnormal(AstNode child) { + child = this.getCondition() or child = this.getBranch(_) + } + + final override predicate first(AstNode first) { first(this.getCondition(), first) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + exists(boolean b | + last(this.getCondition(), pred, c) and + b = c.(BooleanCompletion).getValue() + | + first(this.getBranch(b), succ) + or + not exists(this.getBranch(b)) and + succ = this + ) + or + last(this.getBranch(_), pred, c) and + succ = this and + c instanceof NormalCompletion + } + } + + private class ConditionalLoopTree extends PostOrderTree, ConditionalLoop { + final override predicate propagatesAbnormal(AstNode child) { child = this.getCondition() } + + final override predicate first(AstNode first) { first(this.getCondition(), first) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + last(this.getCondition(), pred, c) and + this.entersLoopWhenConditionIs(c.(BooleanCompletion).getValue()) and + first(this.getBody(), succ) + or + last(this.getBody(), pred, c) and + first(this.getCondition(), succ) and + c.continuesLoop() + or + last(this.getBody(), pred, c) and + first(this.getBody(), succ) and + c instanceof RedoCompletion + or + succ = this and + ( + last(this.getCondition(), pred, c) and + this.entersLoopWhenConditionIs(c.(BooleanCompletion).getValue().booleanNot()) + or + last(this.getBody(), pred, c) and + not c.continuesLoop() and + not c instanceof BreakCompletion and + not c instanceof RedoCompletion + or + last(this.getBody(), pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion()) + ) + } + } + + private class ConstantAccessTree extends PostOrderTree, ConstantAccess { + ConstantAccessTree() { + not this instanceof ClassDeclaration and + not this instanceof ModuleDeclaration + } + + final override predicate propagatesAbnormal(AstNode child) { child = this.getScopeExpr() } + + final override predicate first(AstNode first) { + first(this.getScopeExpr(), first) + or + not exists(this.getScopeExpr()) and + first = this + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + last(this.getScopeExpr(), pred, c) and + succ = this and + c instanceof NormalCompletion + } + } + + /** A parameter that may have a default value. */ + abstract class DefaultValueParameterTree extends ControlFlowTree { + abstract Expr getDefaultValueExpr(); + + abstract AstNode getAccessNode(); + + predicate hasDefaultValue() { exists(this.getDefaultValueExpr()) } + + final override predicate propagatesAbnormal(AstNode child) { + child = this.getDefaultValueExpr() or child = this.getAccessNode() + } + + final override predicate first(AstNode first) { first = this.getAccessNode() } + + final override predicate last(AstNode last, Completion c) { + last(this.getDefaultValueExpr(), last, c) and + c instanceof NormalCompletion + or + last = this.getAccessNode() and + ( + not this.hasDefaultValue() and + c instanceof SimpleCompletion + or + this.hasDefaultValue() and + c.(MatchingCompletion).getValue() = true + ) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + pred = this.getAccessNode() and + first(this.getDefaultValueExpr(), succ) and + c.(MatchingCompletion).getValue() = false + } + } + + private class DesugaredTree extends ControlFlowTree { + ControlFlowTree desugared; + + DesugaredTree() { desugared = this.getDesugared() } + + final override predicate propagatesAbnormal(AstNode child) { + desugared.propagatesAbnormal(child) + } + + final override predicate first(AstNode first) { desugared.first(first) } + + final override predicate last(AstNode last, Completion c) { desugared.last(last, c) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { none() } + } + + private class DoBlockTree extends BodyStmtTree, DoBlock { + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getParameter(i) and rescuable = false + or + result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable) + } + + override predicate propagatesAbnormal(AstNode child) { none() } + } + + private class EmptyStatementTree extends LeafTree, EmptyStmt { } + + class EndBlockTree extends StmtSequenceTree, EndBlock { + override predicate first(AstNode first) { first = this } + + override predicate succ(AstNode pred, AstNode succ, Completion c) { + // Normal left-to-right evaluation in the body + exists(int i | + last(this.getBodyChild(i, _), pred, c) and + first(this.getBodyChild(i + 1, _), succ) and + c instanceof NormalCompletion + ) + } + } + + private class ForInTree extends LeafTree, ForIn { } + + /** + * Control flow of a for-in loop + * + * For example, this program fragment: + * + * ```rb + * for arg in args do + * puts arg + * end + * puts "done"; + * ``` + * + * has the following control flow graph: + * + * ``` + * args + * | + * in------<----- + * / \ \ + * / \ | + * / \ | + * / \ | + * empty non-empty | + * | \ | + * for \ | + * | arg | + * | | | + * puts "done" puts arg | + * \___/ + * ``` + */ + private class ForTree extends PostOrderTree, ForRange { + final override predicate propagatesAbnormal(AstNode child) { + child = this.getPattern() or child = this.getValue() + } + + final override predicate first(AstNode first) { first(this.getValue(), first) } + + /** + * for pattern in array do body end + * ``` + * array +-> in +--[non empty]--> pattern -> body -> in + * |--[empty]--> for + * ``` + */ + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + last(this.getValue(), pred, c) and + first(this.getIn(), succ) and + c instanceof SimpleCompletion + or + last(this.getIn(), pred, c) and + first(this.getPattern(), succ) and + c.(EmptinessCompletion).getValue() = false + or + last(this.getPattern(), pred, c) and + first(this.getBody(), succ) and + c instanceof NormalCompletion + or + last(this.getBody(), pred, c) and + first(this.getIn(), succ) and + c.continuesLoop() + or + last(this.getBody(), pred, c) and + first(this.getBody(), succ) and + c instanceof RedoCompletion + or + succ = this and + ( + last(this.getIn(), pred, c) and + c.(EmptinessCompletion).getValue() = true + or + last(this.getBody(), pred, c) and + not c.continuesLoop() and + not c instanceof BreakCompletion and + not c instanceof RedoCompletion + or + last(this.getBody(), pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion()) + ) + } + } + + private class GlobalVariableTree extends LeafTree, GlobalVariableAccess { } + + private class HashLiteralTree extends StandardPostOrderTree, HashLiteral { + final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) } + } + + private class HashSplatParameterTree extends NonDefaultValueParameterTree, HashSplatParameter { } + + private class HereDocTree extends StandardPreOrderTree, HereDoc { + final override ControlFlowTree getChildElement(int i) { result = this.getComponent(i) } + } + + private class InstanceVariableTree extends LeafTree, InstanceVariableAccess { } + + private class KeywordParameterTree extends DefaultValueParameterTree, KeywordParameter { + final override Expr getDefaultValueExpr() { result = this.getDefaultValue() } + + final override AstNode getAccessNode() { result = this.getDefiningAccess() } + } + + private class LambdaTree extends BodyStmtTree, Lambda { + final override predicate propagatesAbnormal(AstNode child) { none() } + + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getParameter(i) and rescuable = false + or + result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable) + } + } + + private class LocalVariableAccessTree extends LeafTree, LocalVariableAccess { } + + private class LogicalAndTree extends PostOrderTree, LogicalAndExpr { + final override predicate propagatesAbnormal(AstNode child) { child = this.getAnOperand() } + + final override predicate first(AstNode first) { first(this.getLeftOperand(), first) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + last(this.getLeftOperand(), pred, c) and + c instanceof TrueCompletion and + first(this.getRightOperand(), succ) + or + last(this.getLeftOperand(), pred, c) and + c instanceof FalseCompletion and + succ = this + or + last(this.getRightOperand(), pred, c) and + c instanceof NormalCompletion and + succ = this + } + } + + private class LogicalNotTree extends PostOrderTree, NotExpr { + final override predicate propagatesAbnormal(AstNode child) { child = this.getOperand() } + + final override predicate first(AstNode first) { first(this.getOperand(), first) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + succ = this and + last(this.getOperand(), pred, c) and + c instanceof NormalCompletion + } + } + + private class LogicalOrTree extends PostOrderTree, LogicalOrExpr { + final override predicate propagatesAbnormal(AstNode child) { child = this.getAnOperand() } + + final override predicate first(AstNode first) { first(this.getLeftOperand(), first) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + last(this.getLeftOperand(), pred, c) and + c instanceof FalseCompletion and + first(this.getRightOperand(), succ) + or + last(this.getLeftOperand(), pred, c) and + c instanceof TrueCompletion and + succ = this + or + last(this.getRightOperand(), pred, c) and + c instanceof NormalCompletion and + succ = this + } + } + + private class MethodCallTree extends CallTree, MethodCall { + final override ControlFlowTree getChildElement(int i) { + result = this.getReceiver() and i = 0 + or + result = this.getArgument(i - 1) + or + result = this.getBlock() and i = 1 + this.getNumberOfArguments() + } + } + + private class MethodNameTree extends LeafTree, MethodName, ASTInternal::TTokenMethodName { } + + private class MethodTree extends BodyStmtTree, Method { + final override predicate propagatesAbnormal(AstNode child) { none() } + + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getParameter(i) and rescuable = false + or + result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable) + } + } + + private class ModuleDeclarationTree extends NamespaceTree, ModuleDeclaration { + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getScopeExpr() and i = 0 and rescuable = false + or + result = NamespaceTree.super.getBodyChild(i - count(this.getScopeExpr()), rescuable) + } + } + + private class NamespaceTree extends BodyStmtTree, Namespace { + final override predicate first(AstNode first) { + this.firstInner(first) + or + not exists(this.getAChild(_)) and + first = this + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + BodyStmtTree.super.succ(pred, succ, c) + or + succ = this and + this.lastInner(pred, c) + } + } + + private class NilTree extends LeafTree, NilLiteral { } + + private class NumericLiteralTree extends LeafTree, NumericLiteral { } + + private class OptionalParameterTree extends DefaultValueParameterTree, OptionalParameter { + final override Expr getDefaultValueExpr() { result = this.getDefaultValue() } + + final override AstNode getAccessNode() { result = this.getDefiningAccess() } + } + + private class PairTree extends StandardPostOrderTree, Pair { + final override ControlFlowTree getChildElement(int i) { + result = this.getKey() and i = 0 + or + result = this.getValue() and i = 1 + } + } + + private class RangeLiteralTree extends StandardPostOrderTree, RangeLiteral { + final override ControlFlowTree getChildElement(int i) { + result = this.getBegin() and i = 0 + or + result = this.getEnd() and i = 1 + } + } + + private class RedoStmtTree extends LeafTree, RedoStmt { } + + private class RescueModifierTree extends PreOrderTree, RescueModifierExpr { + final override predicate propagatesAbnormal(AstNode child) { child = this.getHandler() } + + final override predicate last(AstNode last, Completion c) { + last(this.getBody(), last, c) and + not c instanceof RaiseCompletion + or + last(this.getHandler(), last, c) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + pred = this and + first(this.getBody(), succ) and + c instanceof SimpleCompletion + or + last(this.getBody(), pred, c) and + c instanceof RaiseCompletion and + first(this.getHandler(), succ) + } + } + + private class RescueTree extends PreOrderTree, RescueClause { + final override predicate propagatesAbnormal(AstNode child) { child = this.getAnException() } + + private Expr getLastException() { + exists(int i | result = this.getException(i) and not exists(this.getException(i + 1))) + } + + predicate lastMatch(AstNode last, Completion c) { + last(this.getBody(), last, c) + or + not exists(this.getBody()) and + ( + last(this.getVariableExpr(), last, c) + or + not exists(this.getVariableExpr()) and + ( + last(this.getAnException(), last, c) and + c.(MatchingCompletion).getValue() = true + or + not exists(this.getAnException()) and + last = this and + c.isValidFor(this) + ) + ) + } + + predicate lastNoMatch(AstNode last, Completion c) { + last(this.getLastException(), last, c) and + c.(MatchingCompletion).getValue() = false + } + + final override predicate last(AstNode last, Completion c) { + this.lastNoMatch(last, c) + or + this.lastMatch(last, c) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + exists(AstNode next | + pred = this and + first(next, succ) and + c instanceof SimpleCompletion + | + next = this.getException(0) + or + not exists(this.getException(0)) and + ( + next = this.getVariableExpr() + or + not exists(this.getVariableExpr()) and + next = this.getBody() + ) + ) + or + exists(AstNode next | + last(this.getAnException(), pred, c) and + first(next, succ) and + c.(MatchingCompletion).getValue() = true + | + next = this.getVariableExpr() + or + not exists(this.getVariableExpr()) and + next = this.getBody() + ) + or + exists(int i | + last(this.getException(i), pred, c) and + c.(MatchingCompletion).getValue() = false and + first(this.getException(i + 1), succ) + ) + or + last(this.getVariableExpr(), pred, c) and + first(this.getBody(), succ) and + c instanceof NormalCompletion + } + } + + private class RetryStmtTree extends LeafTree, RetryStmt { } + + private class ReturningStmtTree extends StandardPostOrderTree, ReturningStmt { + final override ControlFlowTree getChildElement(int i) { result = this.getValue() and i = 0 } + } + + private class SelfTree extends LeafTree, Self { } + + private class SimpleParameterTree extends NonDefaultValueParameterTree, SimpleParameter { } + + // Corner case: For duplicated '_' parameters, only the first occurence has a defining + // access. For subsequent parameters we simply include the parameter itself in the CFG + private class SimpleParameterTreeDupUnderscore extends LeafTree, SimpleParameter { + SimpleParameterTreeDupUnderscore() { not exists(this.getDefiningAccess()) } + } + + private class SingletonClassTree extends BodyStmtTree, SingletonClass { + final override predicate first(AstNode first) { + this.firstInner(first) + or + not exists(this.getAChild(_)) and + first = this + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + BodyStmtTree.super.succ(pred, succ, c) + or + succ = this and + this.lastInner(pred, c) + } + + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + ( + result = this.getValue() and i = 0 and rescuable = false + or + result = BodyStmtTree.super.getBodyChild(i - 1, rescuable) + ) + } + } + + private class SingletonMethodTree extends BodyStmtTree, SingletonMethod { + final override predicate propagatesAbnormal(AstNode child) { none() } + + /** Gets the `i`th child in the body of this block. */ + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getParameter(i) and rescuable = false + or + result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable) + } + + override predicate first(AstNode first) { first(this.getObject(), first) } + + override predicate succ(AstNode pred, AstNode succ, Completion c) { + BodyStmtTree.super.succ(pred, succ, c) + or + last(this.getObject(), pred, c) and + succ = this and + c instanceof NormalCompletion + } + } + + private class SplatParameterTree extends NonDefaultValueParameterTree, SplatParameter { } + + class StmtSequenceTree extends PostOrderTree, StmtSequence { + override predicate propagatesAbnormal(AstNode child) { child = this.getAStmt() } + + override predicate first(AstNode first) { first(this.getStmt(0), first) } + + /** Gets the `i`th child in the body of this body statement. */ + AstNode getBodyChild(int i, boolean rescuable) { + result = this.getStmt(i) and + rescuable = true + } + + final AstNode getLastBodyChild() { + exists(int i | + result = this.getBodyChild(i, _) and + not exists(this.getBodyChild(i + 1, _)) + ) + } + + override predicate succ(AstNode pred, AstNode succ, Completion c) { + // Normal left-to-right evaluation in the body + exists(int i | + last(this.getBodyChild(i, _), pred, c) and + first(this.getBodyChild(i + 1, _), succ) and + c instanceof NormalCompletion + ) + or + succ = this and + last(this.getLastBodyChild(), pred, c) and + c instanceof NormalCompletion + } + } + + private class StringConcatenationTree extends StandardTree, StringConcatenation { + final override ControlFlowTree getChildElement(int i) { result = this.getString(i) } + + final override predicate first(AstNode first) { first(this.getFirstChildElement(), first) } + + final override predicate last(AstNode last, Completion c) { + last(this.getLastChildElement(), last, c) + } + } + + private class StringlikeLiteralTree extends StandardPostOrderTree, StringlikeLiteral { + StringlikeLiteralTree() { not this instanceof HereDoc } + + final override ControlFlowTree getChildElement(int i) { result = this.getComponent(i) } + } + + private class ToplevelTree extends BodyStmtTree, Toplevel { + final override AstNode getBodyChild(int i, boolean rescuable) { + result = this.getBeginBlock(i) and rescuable = true + or + result = BodyStmtTree.super.getBodyChild(i - count(this.getABeginBlock()), rescuable) + } + + final override predicate first(AstNode first) { this.firstInner(first) } + + final override predicate last(AstNode last, Completion c) { this.lastInner(last, c) } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + BodyStmtTree.super.succ(pred, succ, c) + } + } + + private class TuplePatternTree extends StandardPostOrderTree, TuplePattern { + final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) } + } + + private class UndefStmtTree extends StandardPreOrderTree, UndefStmt { + final override ControlFlowTree getChildElement(int i) { result = this.getMethodName(i) } + } + + private class WhenTree extends PreOrderTree, WhenExpr { + final override predicate propagatesAbnormal(AstNode child) { child = this.getAPattern() } + + final Expr getLastPattern() { + exists(int i | + result = this.getPattern(i) and + not exists(this.getPattern(i + 1)) + ) + } + + final override predicate last(AstNode last, Completion c) { + last(this.getLastPattern(), last, c) and + c.(ConditionalCompletion).getValue() = false + or + last(this.getBody(), last, c) + } + + final override predicate succ(AstNode pred, AstNode succ, Completion c) { + pred = this and + first(this.getPattern(0), succ) and + c instanceof SimpleCompletion + or + exists(int i, Expr p, boolean b | + p = this.getPattern(i) and + last(p, pred, c) and + b = c.(ConditionalCompletion).getValue() + | + b = true and + first(this.getBody(), succ) + or + b = false and + first(this.getPattern(i + 1), succ) + ) + } + } +} + +private Scope parent(Scope n) { + result = n.getOuterScope() and + not n instanceof CfgScope::Range_ +} + +/** Gets the CFG scope of node `n`. */ +pragma[inline] +CfgScope getCfgScope(AstNode n) { + exists(AstNode n0 | + pragma[only_bind_into](n0) = n and + pragma[only_bind_into](result) = getCfgScopeImpl(n0) + ) +} + +cached +private module Cached { + /** Gets the CFG scope of node `n`. */ + cached + CfgScope getCfgScopeImpl(AstNode n) { + forceCachingInSameStage() and + result = parent*(ASTInternal::fromGenerated(scopeOf(ASTInternal::toGeneratedInclSynth(n)))) + } + + cached + newtype TSuccessorType = + TSuccessorSuccessor() or + TBooleanSuccessor(boolean b) { b in [false, true] } or + TEmptinessSuccessor(boolean isEmpty) { isEmpty in [false, true] } or + TMatchingSuccessor(boolean isMatch) { isMatch in [false, true] } or + TReturnSuccessor() or + TBreakSuccessor() or + TNextSuccessor() or + TRedoSuccessor() or + TRetrySuccessor() or + TRaiseSuccessor() or // TODO: Add exception type? + TExitSuccessor() +} + +import Cached diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll new file mode 100644 index 00000000000..050a9384729 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll @@ -0,0 +1,945 @@ +/** Provides language-independent definitions for AST-to-CFG construction. */ + +private import ControlFlowGraphImplSpecific + +/** An element with associated control flow. */ +abstract class ControlFlowTree extends ControlFlowTreeBase { + /** Holds if `first` is the first element executed within this element. */ + pragma[nomagic] + abstract predicate first(ControlFlowElement first); + + /** + * Holds if `last` with completion `c` is a potential last element executed + * within this element. + */ + pragma[nomagic] + abstract predicate last(ControlFlowElement last, Completion c); + + /** Holds if abnormal execution of `child` should propagate upwards. */ + abstract predicate propagatesAbnormal(ControlFlowElement child); + + /** + * Holds if `succ` is a control flow successor for `pred`, given that `pred` + * finishes with completion `c`. + */ + pragma[nomagic] + abstract predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c); +} + +/** + * Holds if `first` is the first element executed within control flow + * element `cft`. + */ +predicate first(ControlFlowTree cft, ControlFlowElement first) { cft.first(first) } + +/** + * Holds if `last` with completion `c` is a potential last element executed + * within control flow element `cft`. + */ +predicate last(ControlFlowTree cft, ControlFlowElement last, Completion c) { + cft.last(last, c) + or + exists(ControlFlowElement cfe | + cft.propagatesAbnormal(cfe) and + last(cfe, last, c) and + not completionIsNormal(c) + ) +} + +/** + * Holds if `succ` is a control flow successor for `pred`, given that `pred` + * finishes with completion `c`. + */ +pragma[nomagic] +predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { + any(ControlFlowTree cft).succ(pred, succ, c) +} + +/** An element that is executed in pre-order. */ +abstract class PreOrderTree extends ControlFlowTree { + final override predicate first(ControlFlowElement first) { first = this } +} + +/** An element that is executed in post-order. */ +abstract class PostOrderTree extends ControlFlowTree { + override predicate last(ControlFlowElement last, Completion c) { + last = this and + completionIsValidFor(c, last) + } +} + +/** + * An element where the children are evaluated following a standard left-to-right + * evaluation. The actual evaluation order is determined by the predicate + * `getChildElement()`. + */ +abstract class StandardTree extends ControlFlowTree { + /** Gets the `i`th child element, in order of evaluation. */ + abstract ControlFlowElement getChildElement(int i); + + private ControlFlowElement getChildElementRanked(int i) { + result = + rank[i + 1](ControlFlowElement child, int j | + child = this.getChildElement(j) + | + child order by j + ) + } + + /** Gets the first child node of this element. */ + final ControlFlowElement getFirstChildElement() { result = this.getChildElementRanked(0) } + + /** Gets the last child node of this node. */ + final ControlFlowElement getLastChildElement() { + exists(int last | + result = this.getChildElementRanked(last) and + not exists(this.getChildElementRanked(last + 1)) + ) + } + + /** Holds if this element has no children. */ + predicate isLeafElement() { not exists(this.getFirstChildElement()) } + + override predicate propagatesAbnormal(ControlFlowElement child) { + child = this.getChildElement(_) + } + + pragma[nomagic] + override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { + exists(int i | + last(this.getChildElementRanked(i), pred, c) and + completionIsNormal(c) and + first(this.getChildElementRanked(i + 1), succ) + ) + } +} + +/** A standard element that is executed in pre-order. */ +abstract class StandardPreOrderTree extends StandardTree, PreOrderTree { + override predicate last(ControlFlowElement last, Completion c) { + last(this.getLastChildElement(), last, c) + or + this.isLeafElement() and + completionIsValidFor(c, this) and + last = this + } + + override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { + StandardTree.super.succ(pred, succ, c) + or + pred = this and + first(this.getFirstChildElement(), succ) and + completionIsSimple(c) + } +} + +/** A standard element that is executed in post-order. */ +abstract class StandardPostOrderTree extends StandardTree, PostOrderTree { + override predicate first(ControlFlowElement first) { + first(this.getFirstChildElement(), first) + or + not exists(this.getFirstChildElement()) and + first = this + } + + override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { + StandardTree.super.succ(pred, succ, c) + or + last(this.getLastChildElement(), pred, c) and + succ = this and + completionIsNormal(c) + } +} + +/** An element that is a leaf in the control flow graph. */ +abstract class LeafTree extends PreOrderTree, PostOrderTree { + override predicate propagatesAbnormal(ControlFlowElement child) { none() } + + override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { none() } +} + +/** + * Holds if split kinds `sk1` and `sk2` may overlap. That is, they may apply + * to at least one common AST node inside `scope`. + */ +private predicate overlapping(CfgScope scope, SplitKind sk1, SplitKind sk2) { + exists(ControlFlowElement e | + sk1.appliesTo(e) and + sk2.appliesTo(e) and + scope = getCfgScope(e) + ) +} + +/** + * A split kind. Each control flow node can have at most one split of a + * given kind. + */ +abstract class SplitKind extends SplitKindBase { + /** Gets a split of this kind. */ + SplitImpl getASplit() { result.getKind() = this } + + /** Holds if some split of this kind applies to AST node `n`. */ + predicate appliesTo(ControlFlowElement n) { this.getASplit().appliesTo(n) } + + /** + * Gets a unique integer representing this split kind. The integer is used + * to represent sets of splits as ordered lists. + */ + abstract int getListOrder(); + + /** Gets the rank of this split kind among all overlapping kinds for `c`. */ + private int getRank(CfgScope scope) { + this = rank[result](SplitKind sk | overlapping(scope, this, sk) | sk order by sk.getListOrder()) + } + + /** + * Holds if this split kind is enabled for AST node `n`. For performance reasons, + * the number of splits is restricted by the `maxSplits()` predicate. + */ + predicate isEnabled(ControlFlowElement n) { + this.appliesTo(n) and + this.getRank(getCfgScope(n)) <= maxSplits() + } + + /** + * Gets the rank of this split kind among all the split kinds that apply to + * AST node `n`. The rank is based on the order defined by `getListOrder()`. + */ + int getListRank(ControlFlowElement n) { + this.isEnabled(n) and + this = rank[result](SplitKind sk | sk.appliesTo(n) | sk order by sk.getListOrder()) + } + + /** Gets a textual representation of this split kind. */ + abstract string toString(); +} + +/** Provides the interface for implementing an entity to split on. */ +abstract class SplitImpl extends Split { + /** Gets the kind of this split. */ + abstract SplitKind getKind(); + + /** + * Holds if this split is entered when control passes from `pred` to `succ` with + * completion `c`. + * + * Invariant: `hasEntry(pred, succ, c) implies succ(pred, succ, c)`. + */ + abstract predicate hasEntry(ControlFlowElement pred, ControlFlowElement succ, Completion c); + + /** + * Holds if this split is entered when control passes from `scope` to the entry point + * `first`. + * + * Invariant: `hasEntryScope(scope, first) implies scopeFirst(scope, first)`. + */ + abstract predicate hasEntryScope(CfgScope scope, ControlFlowElement first); + + /** + * Holds if this split is left when control passes from `pred` to `succ` with + * completion `c`. + * + * Invariant: `hasExit(pred, succ, c) implies succ(pred, succ, c)`. + */ + abstract predicate hasExit(ControlFlowElement pred, ControlFlowElement succ, Completion c); + + /** + * Holds if this split is left when control passes from `last` out of the enclosing + * scope `scope` with completion `c`. + * + * Invariant: `hasExitScope(scope, last, c) implies scopeLast(scope, last, c)` + */ + abstract predicate hasExitScope(CfgScope scope, ControlFlowElement last, Completion c); + + /** + * Holds if this split is maintained when control passes from `pred` to `succ` with + * completion `c`. + * + * Invariant: `hasSuccessor(pred, succ, c) implies succ(pred, succ, c)` + */ + abstract predicate hasSuccessor(ControlFlowElement pred, ControlFlowElement succ, Completion c); + + /** Holds if this split applies to control flow element `cfe`. */ + final predicate appliesTo(ControlFlowElement cfe) { + this.hasEntry(_, cfe, _) + or + this.hasEntryScope(_, cfe) + or + exists(ControlFlowElement pred | this.appliesTo(pred) | this.hasSuccessor(pred, cfe, _)) + } + + /** The `succ` relation restricted to predecessors `pred` that this split applies to. */ + pragma[noinline] + final predicate appliesSucc(ControlFlowElement pred, ControlFlowElement succ, Completion c) { + this.appliesTo(pred) and + succ(pred, succ, c) + } +} + +/** + * A set of control flow node splits. The set is represented by a list of splits, + * ordered by ascending rank. + */ +class Splits extends TSplits { + /** Gets a textual representation of this set of splits. */ + string toString() { result = splitsToString(this) } + + /** Gets a split belonging to this set of splits. */ + SplitImpl getASplit() { + exists(SplitImpl head, Splits tail | this = TSplitsCons(head, tail) | + result = head + or + result = tail.getASplit() + ) + } +} + +private predicate succEntrySplitsFromRank( + CfgScope pred, ControlFlowElement succ, Splits splits, int rnk +) { + splits = TSplitsNil() and + scopeFirst(pred, succ) and + rnk = 0 + or + exists(SplitImpl head, Splits tail | succEntrySplitsCons(pred, succ, head, tail, rnk) | + splits = TSplitsCons(head, tail) + ) +} + +private predicate succEntrySplitsCons( + CfgScope pred, ControlFlowElement succ, SplitImpl head, Splits tail, int rnk +) { + succEntrySplitsFromRank(pred, succ, tail, rnk - 1) and + head.hasEntryScope(pred, succ) and + rnk = head.getKind().getListRank(succ) +} + +/** + * Holds if `succ` with splits `succSplits` is the first element that is executed + * when entering callable `pred`. + */ +pragma[noinline] +private predicate succEntrySplits( + CfgScope pred, ControlFlowElement succ, Splits succSplits, SuccessorType t +) { + exists(int rnk | + scopeFirst(pred, succ) and + successorTypeIsSimple(t) and + succEntrySplitsFromRank(pred, succ, succSplits, rnk) + | + rnk = 0 and + not any(SplitImpl split).hasEntryScope(pred, succ) + or + rnk = max(SplitImpl split | split.hasEntryScope(pred, succ) | split.getKind().getListRank(succ)) + ) +} + +/** + * Holds if `pred` with splits `predSplits` can exit the enclosing callable + * `succ` with type `t`. + */ +private predicate succExitSplits( + ControlFlowElement pred, Splits predSplits, CfgScope succ, SuccessorType t +) { + exists(Reachability::SameSplitsBlock b, Completion c | pred = b.getAnElement() | + b.isReachable(predSplits) and + t = getAMatchingSuccessorType(c) and + scopeLast(succ, pred, c) and + forall(SplitImpl predSplit | predSplit = predSplits.getASplit() | + predSplit.hasExitScope(succ, pred, c) + ) + ) +} + +/** + * Provides a predicate for the successor relation with split information, + * as well as logic used to construct the type `TSplits` representing sets + * of splits. Only sets of splits that can be reached are constructed, hence + * the predicates are mutually recursive. + * + * For the successor relation + * + * ```ql + * succSplits(ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, Completion c) + * ``` + * + * the following invariants are maintained: + * + * 1. `pred` is reachable with split set `predSplits`. + * 2. For all `split` in `predSplits`: + * - If `split.hasSuccessor(pred, succ, c)` then `split` in `succSplits`. + * 3. For all `split` in `predSplits`: + * - If `split.hasExit(pred, succ, c)` and not `split.hasEntry(pred, succ, c)` then + * `split` not in `succSplits`. + * 4. For all `split` with kind not in `predSplits`: + * - If `split.hasEntry(pred, succ, c)` then `split` in `succSplits`. + * 5. For all `split` in `succSplits`: + * - `split.hasSuccessor(pred, succ, c)` and `split` in `predSplits`, or + * - `split.hasEntry(pred, succ, c)`. + * + * The algorithm divides into four cases: + * + * 1. The set of splits for the successor is the same as the set of splits + * for the predecessor: + * a) The successor is in the same `SameSplitsBlock` as the predecessor. + * b) The successor is *not* in the same `SameSplitsBlock` as the predecessor. + * 2. The set of splits for the successor is different from the set of splits + * for the predecessor: + * a) The set of splits for the successor is *maybe* non-empty. + * b) The set of splits for the successor is *always* empty. + * + * Only case 2a may introduce new sets of splits, so only predicates from + * this case are used in the definition of `TSplits`. + * + * The predicates in this module are named after the cases above. + */ +private module SuccSplits { + private predicate succInvariant1( + Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits, + ControlFlowElement succ, Completion c + ) { + pred = b.getAnElement() and + b.isReachable(predSplits) and + succ(pred, succ, c) + } + + private predicate case1b0( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c + ) { + exists(Reachability::SameSplitsBlock b | + // Invariant 1 + succInvariant1(b, pred, predSplits, succ, c) + | + (succ = b.getAnElement() implies succ = b) and + // Invariant 4 + not exists(SplitImpl split | split.hasEntry(pred, succ, c)) + ) + } + + /** + * Case 1b. + * + * Invariants 1 and 4 hold in the base case, and invariants 2, 3, and 5 are + * maintained for all splits in `predSplits` (= `succSplits`), except + * possibly for the splits in `except`. + * + * The predicate is written using explicit recursion, as opposed to a `forall`, + * to avoid negative recursion. + */ + private predicate case1bForall( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except + ) { + case1b0(pred, predSplits, succ, c) and + except = predSplits + or + exists(SplitImpl split | + case1bForallCons(pred, predSplits, succ, c, split, except) and + split.hasSuccessor(pred, succ, c) + ) + } + + pragma[noinline] + private predicate case1bForallCons( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, + SplitImpl exceptHead, Splits exceptTail + ) { + case1bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail)) + } + + private predicate case1( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c + ) { + // Case 1a + exists(Reachability::SameSplitsBlock b | succInvariant1(b, pred, predSplits, succ, c) | + succ = b.getAnElement() and + not succ = b + ) + or + // Case 1b + case1bForall(pred, predSplits, succ, c, TSplitsNil()) + } + + pragma[noinline] + private SplitImpl succInvariant1GetASplit( + Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits, + ControlFlowElement succ, Completion c + ) { + succInvariant1(b, pred, predSplits, succ, c) and + result = predSplits.getASplit() + } + + private predicate case2aux( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c + ) { + exists(Reachability::SameSplitsBlock b | + succInvariant1(b, pred, predSplits, succ, c) and + (succ = b.getAnElement() implies succ = b) + | + succInvariant1GetASplit(b, pred, predSplits, succ, c).hasExit(pred, succ, c) + or + any(SplitImpl split).hasEntry(pred, succ, c) + ) + } + + /** + * Holds if `succSplits` should not inherit a split of kind `sk` from + * `predSplits`, except possibly because of a split in `except`. + * + * The predicate is written using explicit recursion, as opposed to a `forall`, + * to avoid negative recursion. + */ + private predicate case2aNoneInheritedOfKindForall( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk, + Splits except + ) { + case2aux(pred, predSplits, succ, c) and + sk.appliesTo(succ) and + except = predSplits + or + exists(Splits mid, SplitImpl split | + case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, mid) and + mid = TSplitsCons(split, except) + | + split.getKind() = any(SplitKind sk0 | sk0 != sk and sk0.appliesTo(succ)) + or + split.hasExit(pred, succ, c) + ) + } + + pragma[nomagic] + private predicate entryOfKind( + ControlFlowElement pred, ControlFlowElement succ, Completion c, SplitImpl split, SplitKind sk + ) { + split.hasEntry(pred, succ, c) and + sk = split.getKind() + } + + /** Holds if `succSplits` should not have a split of kind `sk`. */ + pragma[nomagic] + private predicate case2aNoneOfKind( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk + ) { + // None inherited from predecessor + case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, TSplitsNil()) and + // None newly entered into + not entryOfKind(pred, succ, c, _, sk) + } + + /** Holds if `succSplits` should not have a split of kind `sk` at rank `rnk`. */ + pragma[nomagic] + private predicate case2aNoneAtRank( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk + ) { + exists(SplitKind sk | case2aNoneOfKind(pred, predSplits, succ, c, sk) | + rnk = sk.getListRank(succ) + ) + } + + pragma[nomagic] + private SplitImpl case2auxGetAPredecessorSplit( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c + ) { + case2aux(pred, predSplits, succ, c) and + result = predSplits.getASplit() + } + + /** Gets a split that should be in `succSplits`. */ + pragma[nomagic] + private SplitImpl case2aSome( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk + ) { + ( + // Inherited from predecessor + result = case2auxGetAPredecessorSplit(pred, predSplits, succ, c) and + result.hasSuccessor(pred, succ, c) + or + // Newly entered into + exists(SplitKind sk0 | + case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk0, TSplitsNil()) + | + entryOfKind(pred, succ, c, result, sk0) + ) + ) and + sk = result.getKind() + } + + /** Gets a split that should be in `succSplits` at rank `rnk`. */ + pragma[nomagic] + SplitImpl case2aSomeAtRank( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk + ) { + exists(SplitKind sk | result = case2aSome(pred, predSplits, succ, c, sk) | + rnk = sk.getListRank(succ) + ) + } + + /** + * Case 2a. + * + * As opposed to the other cases, in this case we need to construct a new set + * of splits `succSplits`. Since this involves constructing the very IPA type, + * we cannot recurse directly over the structure of `succSplits`. Instead, we + * recurse over the ranks of all splits that *might* be in `succSplits`. + * + * - Invariant 1 holds in the base case, + * - invariant 2 holds for all splits with rank at least `rnk`, + * - invariant 3 holds for all splits in `predSplits`, + * - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`, + * and + * - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`. + */ + predicate case2aFromRank( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + Completion c, int rnk + ) { + case2aux(pred, predSplits, succ, c) and + succSplits = TSplitsNil() and + rnk = max(any(SplitKind sk).getListRank(succ)) + 1 + or + case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and + case2aNoneAtRank(pred, predSplits, succ, c, rnk) + or + exists(Splits mid, SplitImpl split | split = case2aCons(pred, predSplits, succ, mid, c, rnk) | + succSplits = TSplitsCons(split, mid) + ) + } + + pragma[noinline] + private SplitImpl case2aCons( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + Completion c, int rnk + ) { + case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and + result = case2aSomeAtRank(pred, predSplits, succ, c, rnk) + } + + /** + * Case 2b. + * + * Invariants 1, 4, and 5 hold in the base case, and invariants 2 and 3 are + * maintained for all splits in `predSplits`, except possibly for the splits + * in `except`. + * + * The predicate is written using explicit recursion, as opposed to a `forall`, + * to avoid negative recursion. + */ + private predicate case2bForall( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except + ) { + // Invariant 1 + case2aux(pred, predSplits, succ, c) and + // Invariants 4 and 5 + not any(SplitKind sk).appliesTo(succ) and + except = predSplits + or + exists(SplitImpl split | case2bForallCons(pred, predSplits, succ, c, split, except) | + // Invariants 2 and 3 + split.hasExit(pred, succ, c) + ) + } + + pragma[noinline] + private predicate case2bForallCons( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, + SplitImpl exceptHead, Splits exceptTail + ) { + case2bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail)) + } + + private predicate case2( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + Completion c + ) { + case2aFromRank(pred, predSplits, succ, succSplits, c, 1) + or + case2bForall(pred, predSplits, succ, c, TSplitsNil()) and + succSplits = TSplitsNil() + } + + /** + * Holds if `succ` with splits `succSplits` is a successor of type `t` for `pred` + * with splits `predSplits`. + */ + predicate succSplits( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + Completion c + ) { + case1(pred, predSplits, succ, c) and + succSplits = predSplits + or + case2(pred, predSplits, succ, succSplits, c) + } +} + +import SuccSplits + +/** Provides logic for calculating reachable control flow nodes. */ +private module Reachability { + /** + * Holds if `cfe` is a control flow element where the set of possible splits may + * be different from the set of possible splits for one of `cfe`'s predecessors. + * That is, `cfe` starts a new block of elements with the same set of splits. + */ + private predicate startsSplits(ControlFlowElement cfe) { + scopeFirst(_, cfe) + or + exists(SplitImpl s | + s.hasEntry(_, cfe, _) + or + s.hasExit(_, cfe, _) + ) + or + exists(ControlFlowElement pred, SplitImpl split, Completion c | succ(pred, cfe, c) | + split.appliesTo(pred) and + not split.hasSuccessor(pred, cfe, c) + ) + } + + private predicate intraSplitsSucc(ControlFlowElement pred, ControlFlowElement succ) { + succ(pred, succ, _) and + not startsSplits(succ) + } + + private predicate splitsBlockContains(ControlFlowElement start, ControlFlowElement cfe) = + fastTC(intraSplitsSucc/2)(start, cfe) + + /** + * A block of control flow elements where the set of splits is guaranteed + * to remain unchanged, represented by the first element in the block. + */ + class SameSplitsBlock extends ControlFlowElement { + SameSplitsBlock() { startsSplits(this) } + + /** Gets a control flow element in this block. */ + ControlFlowElement getAnElement() { + splitsBlockContains(this, result) + or + result = this + } + + pragma[noinline] + private SameSplitsBlock getASuccessor(Splits predSplits, Splits succSplits) { + exists(ControlFlowElement pred | pred = this.getAnElement() | + succSplits(pred, predSplits, result, succSplits, _) + ) + } + + /** + * Holds if the elements of this block are reachable from a callable entry + * point, with the splits `splits`. + */ + predicate isReachable(Splits splits) { + // Base case + succEntrySplits(_, this, splits, _) + or + // Recursive case + exists(SameSplitsBlock pred, Splits predSplits | pred.isReachable(predSplits) | + this = pred.getASuccessor(predSplits, splits) + ) + } + } +} + +cached +private module Cached { + /** + * If needed, call this predicate from `ControlFlowGraphImplSpecific.qll` in order to + * force a stage-dependency on the `ControlFlowGraphImplShared.qll` stage and therby + * collapsing the two stages. + */ + cached + predicate forceCachingInSameStage() { any() } + + cached + newtype TSplits = + TSplitsNil() or + TSplitsCons(SplitImpl head, Splits tail) { + exists( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk + | + case2aFromRank(pred, predSplits, succ, tail, c, rnk + 1) and + head = case2aSomeAtRank(pred, predSplits, succ, c, rnk) + ) + or + succEntrySplitsCons(_, _, head, tail, _) + } + + cached + string splitsToString(Splits splits) { + splits = TSplitsNil() and + result = "" + or + exists(SplitImpl head, Splits tail, string headString, string tailString | + splits = TSplitsCons(head, tail) + | + headString = head.toString() and + tailString = tail.toString() and + if tailString = "" + then result = headString + else + if headString = "" + then result = tailString + else result = headString + ", " + tailString + ) + } + + /** + * Internal representation of control flow nodes in the control flow graph. + * The control flow graph is pruned for unreachable nodes. + */ + cached + newtype TNode = + TEntryNode(CfgScope scope) { succEntrySplits(scope, _, _, _) } or + TAnnotatedExitNode(CfgScope scope, boolean normal) { + exists(Reachability::SameSplitsBlock b, SuccessorType t | b.isReachable(_) | + succExitSplits(b.getAnElement(), _, scope, t) and + if isAbnormalExitType(t) then normal = false else normal = true + ) + } or + TExitNode(CfgScope scope) { + exists(Reachability::SameSplitsBlock b | b.isReachable(_) | + succExitSplits(b.getAnElement(), _, scope, _) + ) + } or + TElementNode(ControlFlowElement cfe, Splits splits) { + exists(Reachability::SameSplitsBlock b | b.isReachable(splits) | cfe = b.getAnElement()) + } + + /** Gets a successor node of a given flow type, if any. */ + cached + TNode getASuccessor(TNode pred, SuccessorType t) { + // Callable entry node -> callable body + exists(ControlFlowElement succElement, Splits succSplits, CfgScope scope | + result = TElementNode(succElement, succSplits) and + pred = TEntryNode(scope) and + succEntrySplits(scope, succElement, succSplits, t) + ) + or + exists(ControlFlowElement predElement, Splits predSplits | + pred = TElementNode(predElement, predSplits) + | + // Element node -> callable exit (annotated) + exists(CfgScope scope, boolean normal | + result = TAnnotatedExitNode(scope, normal) and + succExitSplits(predElement, predSplits, scope, t) and + if isAbnormalExitType(t) then normal = false else normal = true + ) + or + // Element node -> element node + exists(ControlFlowElement succElement, Splits succSplits, Completion c | + result = TElementNode(succElement, succSplits) + | + succSplits(predElement, predSplits, succElement, succSplits, c) and + t = getAMatchingSuccessorType(c) + ) + ) + or + // Callable exit (annotated) -> callable exit + exists(CfgScope scope | + pred = TAnnotatedExitNode(scope, _) and + result = TExitNode(scope) and + successorTypeIsSimple(t) + ) + } + + /** + * Gets a first control flow element executed within `cfe`. + */ + cached + ControlFlowElement getAControlFlowEntryNode(ControlFlowElement cfe) { first(cfe, result) } + + /** + * Gets a potential last control flow element executed within `cfe`. + */ + cached + ControlFlowElement getAControlFlowExitNode(ControlFlowElement cfe) { last(cfe, result, _) } +} + +import Cached + +/** + * Import this module into a `.ql` file of `@kind graph` to render a CFG. The + * graph is restricted to nodes from `RelevantNode`. + */ +module TestOutput { + abstract class RelevantNode extends Node { } + + query predicate nodes(RelevantNode n, string attr, string val) { + attr = "semmle.order" and + val = + any(int i | + n = + rank[i](RelevantNode p, Location l | + l = p.getLocation() + | + p + order by + l.getFile().getBaseName(), l.getFile().getAbsolutePath(), l.getStartLine(), + l.getStartColumn() + ) + ).toString() + } + + query predicate edges(RelevantNode pred, RelevantNode succ, string attr, string val) { + exists(SuccessorType t | succ = getASuccessor(pred, t) | + attr = "semmle.label" and + if successorTypeIsSimple(t) then val = "" else val = t.toString() + ) + } +} + +/** Provides a set of splitting-related consistency queries. */ +module Consistency { + query predicate nonUniqueSetRepresentation(Splits s1, Splits s2) { + forex(Split s | s = s1.getASplit() | s = s2.getASplit()) and + forex(Split s | s = s2.getASplit() | s = s1.getASplit()) and + s1 != s2 + } + + query predicate breakInvariant2( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + SplitImpl split, Completion c + ) { + succSplits(pred, predSplits, succ, succSplits, c) and + split = predSplits.getASplit() and + split.hasSuccessor(pred, succ, c) and + not split = succSplits.getASplit() + } + + query predicate breakInvariant3( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + SplitImpl split, Completion c + ) { + succSplits(pred, predSplits, succ, succSplits, c) and + split = predSplits.getASplit() and + split.hasExit(pred, succ, c) and + not split.hasEntry(pred, succ, c) and + split = succSplits.getASplit() + } + + query predicate breakInvariant4( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + SplitImpl split, Completion c + ) { + succSplits(pred, predSplits, succ, succSplits, c) and + split.hasEntry(pred, succ, c) and + not split.getKind() = predSplits.getASplit().getKind() and + not split = succSplits.getASplit() + } + + query predicate breakInvariant5( + ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, + SplitImpl split, Completion c + ) { + succSplits(pred, predSplits, succ, succSplits, c) and + split = succSplits.getASplit() and + not (split.hasSuccessor(pred, succ, c) and split = predSplits.getASplit()) and + not split.hasEntry(pred, succ, c) + } + + query predicate multipleSuccessors(Node node, SuccessorType t, Node successor) { + not node instanceof TEntryNode and + strictcount(getASuccessor(node, t)) > 1 and + successor = getASuccessor(node, t) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll new file mode 100644 index 00000000000..2d018ff616a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll @@ -0,0 +1,74 @@ +private import ruby as rb +private import ControlFlowGraphImpl as Impl +private import Completion as Comp +private import codeql.ruby.ast.internal.Synthesis +private import Splitting as Splitting +private import codeql.ruby.CFG as CFG + +/** The base class for `ControlFlowTree`. */ +class ControlFlowTreeBase extends rb::AstNode { + ControlFlowTreeBase() { not any(Synthesis s).excludeFromControlFlowTree(this) } +} + +class ControlFlowElement = rb::AstNode; + +class Completion = Comp::Completion; + +/** + * Hold if `c` represents normal evaluation of a statement or an + * expression. + */ +predicate completionIsNormal(Completion c) { c instanceof Comp::NormalCompletion } + +/** + * Hold if `c` represents simple (normal) evaluation of a statement or an + * expression. + */ +predicate completionIsSimple(Completion c) { c instanceof Comp::SimpleCompletion } + +/** Holds if `c` is a valid completion for `e`. */ +predicate completionIsValidFor(Completion c, ControlFlowElement e) { c.isValidFor(e) } + +class CfgScope = CFG::CfgScope; + +predicate getCfgScope = Impl::getCfgScope/1; + +/** Holds if `first` is first executed when entering `scope`. */ +predicate scopeFirst(CfgScope scope, ControlFlowElement first) { + scope.(Impl::CfgScope::Range_).entry(first) +} + +/** Holds if `scope` is exited when `last` finishes with completion `c`. */ +predicate scopeLast(CfgScope scope, ControlFlowElement last, Completion c) { + scope.(Impl::CfgScope::Range_).exit(last, c) +} + +/** The maximum number of splits allowed for a given node. */ +int maxSplits() { result = 5 } + +class SplitKindBase = Splitting::TSplitKind; + +class Split = Splitting::Split; + +class SuccessorType = CFG::SuccessorType; + +/** Gets a successor type that matches completion `c`. */ +SuccessorType getAMatchingSuccessorType(Completion c) { result = c.getAMatchingSuccessorType() } + +/** + * Hold if `c` represents simple (normal) evaluation of a statement or an + * expression. + */ +predicate successorTypeIsSimple(SuccessorType t) { + t instanceof CFG::SuccessorTypes::NormalSuccessor +} + +/** Holds if `t` is an abnormal exit type out of a CFG scope. */ +predicate isAbnormalExitType(SuccessorType t) { + t instanceof CFG::SuccessorTypes::RaiseSuccessor or + t instanceof CFG::SuccessorTypes::ExitSuccessor +} + +class Location = rb::Location; + +class Node = CFG::CfgNode; diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll new file mode 100644 index 00000000000..e1927a0b1c9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll @@ -0,0 +1,22 @@ +/** Provides a simple analysis for identifying calls that will not return. */ + +private import codeql.ruby.AST +private import Completion + +/** A call that definitely does not return (conservative analysis). */ +abstract class NonReturningCall extends MethodCall { + /** Gets a valid completion for this non-returning call. */ + abstract Completion getACompletion(); +} + +private class RaiseCall extends NonReturningCall { + RaiseCall() { this.getMethodName() = "raise" } + + override RaiseCompletion getACompletion() { not result instanceof NestedCompletion } +} + +private class ExitCall extends NonReturningCall { + ExitCall() { this.getMethodName() in ["abort", "exit"] } + + override ExitCompletion getACompletion() { not result instanceof NestedCompletion } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll new file mode 100644 index 00000000000..dd360fe8371 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll @@ -0,0 +1,336 @@ +/** + * Provides classes and predicates relevant for splitting the control flow graph. + */ + +private import codeql.ruby.AST +private import Completion +private import ControlFlowGraphImpl +private import SuccessorTypes +private import codeql.ruby.controlflow.ControlFlowGraph + +cached +private module Cached { + cached + newtype TSplitKind = + TConditionalCompletionSplitKind() { forceCachingInSameStage() } or + TEnsureSplitKind(int nestLevel) { nestLevel = any(Trees::BodyStmtTree t).getNestLevel() } + + cached + newtype TSplit = + TConditionalCompletionSplit(ConditionalCompletion c) or + TEnsureSplit(EnsureSplitting::EnsureSplitType type, int nestLevel) { + nestLevel = any(Trees::BodyStmtTree t).getNestLevel() + } +} + +import Cached + +/** A split for a control flow node. */ +class Split extends TSplit { + /** Gets a textual representation of this split. */ + string toString() { none() } +} + +private module ConditionalCompletionSplitting { + /** + * A split for conditional completions. For example, in + * + * ```rb + * def method x + * if x < 2 and x > 0 + * puts "x is 1" + * end + * end + * ``` + * + * we record whether `x < 2` and `x > 0` evaluate to `true` or `false`, and + * restrict the edges out of `x < 2 and x > 0` accordingly. + */ + class ConditionalCompletionSplit extends Split, TConditionalCompletionSplit { + ConditionalCompletion completion; + + ConditionalCompletionSplit() { this = TConditionalCompletionSplit(completion) } + + override string toString() { result = completion.toString() } + } + + private class ConditionalCompletionSplitKind extends SplitKind, TConditionalCompletionSplitKind { + override int getListOrder() { result = 0 } + + override predicate isEnabled(AstNode n) { this.appliesTo(n) } + + override string toString() { result = "ConditionalCompletion" } + } + + int getNextListOrder() { result = 1 } + + private class ConditionalCompletionSplitImpl extends SplitImpl, ConditionalCompletionSplit { + override ConditionalCompletionSplitKind getKind() { any() } + + override predicate hasEntry(AstNode pred, AstNode succ, Completion c) { + succ(pred, succ, c) and + last(succ, _, completion) and + ( + last(succ.(NotExpr).getOperand(), pred, c) and + completion.(BooleanCompletion).getDual() = c + or + last(succ.(LogicalAndExpr).getAnOperand(), pred, c) and + completion = c + or + last(succ.(LogicalOrExpr).getAnOperand(), pred, c) and + completion = c + or + last(succ.(StmtSequence).getLastStmt(), pred, c) and + completion = c + or + last(succ.(ConditionalExpr).getBranch(_), pred, c) and + completion = c + ) + } + + override predicate hasEntryScope(CfgScope scope, AstNode succ) { none() } + + override predicate hasExit(AstNode pred, AstNode succ, Completion c) { + this.appliesTo(pred) and + succ(pred, succ, c) and + if c instanceof ConditionalCompletion then completion = c else any() + } + + override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) { + this.appliesTo(last) and + succExit(scope, last, c) and + if c instanceof ConditionalCompletion then completion = c else any() + } + + override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) { none() } + } +} + +module EnsureSplitting { + /** + * The type of a split `ensure` node. + * + * The type represents one of the possible ways of entering an `ensure` + * block. For example, if a block ends with a `return` statement, then + * the `ensure` block must end with a `return` as well (provided that + * the `ensure` block executes normally). + */ + class EnsureSplitType extends SuccessorType { + EnsureSplitType() { not this instanceof ConditionalSuccessor } + + /** Holds if this split type matches entry into an `ensure` block with completion `c`. */ + predicate isSplitForEntryCompletion(Completion c) { + if c instanceof NormalCompletion + then + // If the entry into the `ensure` block completes with any normal completion, + // it simply means normal execution after the `ensure` block + this instanceof NormalSuccessor + else this = c.getAMatchingSuccessorType() + } + } + + /** A node that belongs to an `ensure` block. */ + private class EnsureNode extends AstNode { + private Trees::BodyStmtTree block; + + EnsureNode() { this = block.getAnEnsureDescendant() } + + int getNestLevel() { result = block.getNestLevel() } + + /** Holds if this node is the entry node in the `ensure` block it belongs to. */ + predicate isEntryNode() { first(block.getEnsure(), this) } + } + + /** + * A split for nodes belonging to an `ensure` block, which determines how to + * continue execution after leaving the `ensure` block. For example, in + * + * ```rb + * begin + * if x + * raise "Exception" + * end + * ensure + * puts "Ensure" + * end + * ``` + * + * all control flow nodes in the `ensure` block have two splits: one representing + * normal execution of the body (when `x` evaluates to `true`), and one representing + * exceptional execution of the body (when `x` evaluates to `false`). + */ + class EnsureSplit extends Split, TEnsureSplit { + private EnsureSplitType type; + private int nestLevel; + + EnsureSplit() { this = TEnsureSplit(type, nestLevel) } + + /** + * Gets the type of this `ensure` split, that is, how to continue execution after the + * `ensure` block. + */ + EnsureSplitType getType() { result = type } + + /** Gets the nesting level. */ + int getNestLevel() { result = nestLevel } + + override string toString() { + if type instanceof NormalSuccessor + then result = "" + else + if nestLevel > 0 + then result = "ensure(" + nestLevel + "): " + type.toString() + else result = "ensure: " + type.toString() + } + } + + private int getListOrder(EnsureSplitKind kind) { + result = ConditionalCompletionSplitting::getNextListOrder() + kind.getNestLevel() + } + + int getNextListOrder() { + result = max([getListOrder(_) + 1, ConditionalCompletionSplitting::getNextListOrder()]) + } + + private class EnsureSplitKind extends SplitKind, TEnsureSplitKind { + private int nestLevel; + + EnsureSplitKind() { this = TEnsureSplitKind(nestLevel) } + + /** Gets the nesting level. */ + int getNestLevel() { result = nestLevel } + + override int getListOrder() { result = getListOrder(this) } + + override string toString() { result = "ensure (" + nestLevel + ")" } + } + + pragma[noinline] + private predicate hasEntry0(AstNode pred, EnsureNode succ, int nestLevel, Completion c) { + succ.isEntryNode() and + nestLevel = succ.getNestLevel() and + succ(pred, succ, c) + } + + private class EnsureSplitImpl extends SplitImpl, EnsureSplit { + override EnsureSplitKind getKind() { result.getNestLevel() = this.getNestLevel() } + + override predicate hasEntry(AstNode pred, AstNode succ, Completion c) { + hasEntry0(pred, succ, this.getNestLevel(), c) and + this.getType().isSplitForEntryCompletion(c) + } + + override predicate hasEntryScope(CfgScope scope, AstNode first) { none() } + + /** + * Holds if this split applies to `pred`, where `pred` is a valid predecessor. + */ + private predicate appliesToPredecessor(AstNode pred) { + this.appliesTo(pred) and + (succ(pred, _, _) or succExit(_, pred, _)) + } + + pragma[noinline] + private predicate exit0(AstNode pred, Trees::BodyStmtTree block, int nestLevel, Completion c) { + this.appliesToPredecessor(pred) and + nestLevel = block.getNestLevel() and + block.lastInner(pred, c) + } + + /** + * Holds if `pred` may exit this split with completion `c`. The Boolean + * `inherited` indicates whether `c` is an inherited completion from the + * body. + */ + private predicate exit(Trees::BodyStmtTree block, AstNode pred, Completion c, boolean inherited) { + exists(EnsureSplitType type | + exit0(pred, block, this.getNestLevel(), c) and + type = this.getType() + | + if last(block.getEnsure(), pred, c) + then + // `ensure` block can itself exit with completion `c`: either `c` must + // match this split, `c` must be an abnormal completion, or this split + // does not require another completion to be recovered + inherited = false and + ( + type = c.getAMatchingSuccessorType() + or + not c instanceof NormalCompletion + or + type instanceof NormalSuccessor + ) + else ( + // `ensure` block can exit with inherited completion `c`, which must + // match this split + inherited = true and + type = c.getAMatchingSuccessorType() and + not type instanceof NormalSuccessor + ) + ) + or + // If this split is normal, and an outer split can exit based on an inherited + // completion, we need to exit this split as well. For example, in + // + // ```rb + // def m(b1, b2) + // if b1 + // return + // end + // ensure + // begin + // if b2 + // raise "Exception" + // end + // ensure + // puts "inner ensure" + // end + // end + // ``` + // + // if the outer split for `puts "inner ensure"` is `return` and the inner split + // is "normal" (corresponding to `b1 = true` and `b2 = false`), then the inner + // split must be able to exit with a `return` completion. + this.appliesToPredecessor(pred) and + exists(EnsureSplitImpl outer | + outer.getNestLevel() = this.getNestLevel() - 1 and + outer.exit(_, pred, c, inherited) and + this.getType() instanceof NormalSuccessor and + inherited = true + ) + } + + override predicate hasExit(AstNode pred, AstNode succ, Completion c) { + succ(pred, succ, c) and + ( + exit(_, pred, c, _) + or + exit(_, pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _) + ) + } + + override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) { + succExit(scope, last, c) and + ( + exit(_, last, c, _) + or + exit(_, last, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _) + ) + } + + override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) { + this.appliesToPredecessor(pred) and + succ(pred, succ, c) and + succ = + any(EnsureNode en | + if en.isEntryNode() + then + // entering a nested `ensure` block + en.getNestLevel() > this.getNestLevel() + else + // staying in the same (possibly nested) `ensure` block as `pred` + en.getNestLevel() >= this.getNestLevel() + ) + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll new file mode 100644 index 00000000000..0c0ca749eac --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll @@ -0,0 +1,75 @@ +/** Provides commonly used barriers to dataflow. */ + +private import ruby +private import codeql.ruby.DataFlow +private import codeql.ruby.CFG + +/** + * A validation of value by comparing with a constant string value, for example + * in: + * + * ```rb + * dir = params[:order] + * dir = "DESC" unless dir == "ASC" + * User.order("name #{dir}") + * ``` + * + * the equality operation guards against `dir` taking arbitrary values when used + * in the `order` call. + */ +class StringConstCompare extends DataFlow::BarrierGuard, + CfgNodes::ExprNodes::ComparisonOperationCfgNode { + private CfgNode checkedNode; + // The value of the condition that results in the node being validated. + private boolean checkedBranch; + + StringConstCompare() { + exists(CfgNodes::ExprNodes::StringLiteralCfgNode strLitNode | + this.getExpr() instanceof EqExpr and checkedBranch = true + or + this.getExpr() instanceof CaseEqExpr and checkedBranch = true + or + this.getExpr() instanceof NEExpr and checkedBranch = false + | + this.getLeftOperand() = strLitNode and this.getRightOperand() = checkedNode + or + this.getLeftOperand() = checkedNode and this.getRightOperand() = strLitNode + ) + } + + override predicate checks(CfgNode expr, boolean branch) { + expr = checkedNode and branch = checkedBranch + } +} + +/** + * A validation of a value by checking for inclusion in an array of string + * literal values, for example in: + * + * ```rb + * name = params[:user_name] + * if %w(alice bob charlie).include? name + * User.find_by("username = #{name}") + * end + * ``` + * + * the `include?` call guards against `name` taking arbitrary values when used + * in the `find_by` call. + */ +// +class StringConstArrayInclusionCall extends DataFlow::BarrierGuard, + CfgNodes::ExprNodes::MethodCallCfgNode { + private CfgNode checkedNode; + + StringConstArrayInclusionCall() { + exists(ArrayLiteral aLit | + this.getExpr().getMethodName() = "include?" and + this.getExpr().getReceiver() = aLit + | + forall(Expr elem | elem = aLit.getAnElement() | elem instanceof StringLiteral) and + this.getArgument(0) = checkedNode + ) + } + + override predicate checks(CfgNode expr, boolean branch) { expr = checkedNode and branch = true } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll new file mode 100644 index 00000000000..ddd44329317 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll @@ -0,0 +1,125 @@ +/** Provides classes and predicates for defining flow summaries. */ + +import ruby +import codeql.ruby.DataFlow +private import internal.FlowSummaryImpl as Impl +private import internal.DataFlowDispatch + +// import all instances below +private module Summaries { } + +class SummaryComponent = Impl::Public::SummaryComponent; + +/** Provides predicates for constructing summary components. */ +module SummaryComponent { + private import Impl::Public::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + /** Gets a summary component that represents a qualifier. */ + SummaryComponent qualifier() { result = argument(-1) } + + /** Gets a summary component that represents a block argument. */ + SummaryComponent block() { result = argument(-2) } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) } +} + +class SummaryComponentStack = Impl::Public::SummaryComponentStack; + +/** Provides predicates for constructing stacks of summary components. */ +module SummaryComponentStack { + private import Impl::Public::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing a qualifier. */ + SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } + + /** Gets a singleton stack representing a block argument. */ + SummaryComponentStack block() { result = singleton(SummaryComponent::block()) } + + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } +} + +/** A callable with a flow summary, identified by a unique string. */ +abstract class SummarizedCallable extends LibraryCallable { + bindingset[this] + SummarizedCallable() { any() } + + /** + * Holds if data may flow from `input` to `output` through this callable. + * + * `preservesValue` indicates whether this is a value-preserving step + * or a taint-step. + * + * Input specifications are restricted to stacks that end with + * `SummaryComponent::argument(_)`, preceded by zero or more + * `SummaryComponent::return()` or `SummaryComponent::content(_)` components. + * + * Output specifications are restricted to stacks that end with + * `SummaryComponent::return()` or `SummaryComponent::argument(_)`. + * + * Output stacks ending with `SummaryComponent::return()` can be preceded by zero + * or more `SummaryComponent::content(_)` components. + * + * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an + * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded + * by zero or more `SummaryComponent::content(_)` components. + */ + pragma[nomagic] + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + none() + } + + /** + * Same as + * + * ```ql + * propagatesFlow( + * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + * ) + * ``` + * + * but uses an external (string) representation of the input and output stacks. + */ + pragma[nomagic] + predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() } + + /** + * Holds if values stored inside `content` are cleared on objects passed as + * the `i`th argument to this callable. + */ + pragma[nomagic] + predicate clearsContent(int i, DataFlow::Content content) { none() } +} + +private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable { + private SummarizedCallable sc; + + SummarizedCallableAdapter() { this = TLibraryCallable(sc) } + + final override predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + sc.propagatesFlow(input, output, preservesValue) + } + + final override predicate clearsContent(int i, DataFlow::Content content) { + sc.clearsContent(i, content) + } +} + +class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll new file mode 100644 index 00000000000..617bfd8678e --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll @@ -0,0 +1,37 @@ +/** + * Provides an extension point for for modeling user-controlled data. + * Such data is often used as data-flow sources in security queries. + */ + +private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlow +// Need to import since frameworks can extend `RemoteFlowSource::Range` +private import codeql.ruby.Frameworks + +/** + * A data flow source of remote user input. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RemoteFlowSource::Range` instead. + */ +class RemoteFlowSource extends DataFlow::Node { + RemoteFlowSource::Range self; + + RemoteFlowSource() { this = self } + + /** Gets a string that describes the type of this remote flow source. */ + string getSourceType() { result = self.getSourceType() } +} + +/** Provides a class for modeling new sources of remote user input. */ +module RemoteFlowSource { + /** + * A data flow source of remote user input. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RemoteFlowSource` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets a string that describes the type of this remote flow source. */ + abstract string getSourceType(); + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll new file mode 100644 index 00000000000..dedfcd4e3de --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll @@ -0,0 +1,385 @@ +/** + * Provides the module `Ssa` for working with static single assignment (SSA) form. + */ + +/** + * Provides classes for working with static single assignment (SSA) form. + */ +module Ssa { + private import codeql.Locations + private import codeql.ruby.CFG + private import codeql.ruby.ast.Variable + private import internal.SsaImplCommon as SsaImplCommon + private import internal.SsaImpl as SsaImpl + private import CfgNodes::ExprNodes + + /** A static single assignment (SSA) definition. */ + class Definition extends SsaImplCommon::Definition { + /** + * Gets the control flow node of this SSA definition, if any. Phi nodes are + * examples of SSA definitions without a control flow node, as they are + * modelled at index `-1` in the relevant basic block. + */ + final CfgNode getControlFlowNode() { + exists(BasicBlock bb, int i | this.definesAt(_, bb, i) | result = bb.getNode(i)) + } + + /** + * Gets a control-flow node that reads the value of this SSA definition. + * + * Example: + * + * ```rb + * def m b # defines b_0 + * i = 0 # defines i_0 + * puts i # reads i_0 + * puts i + 1 # reads i_0 + * if b # reads b_0 + * i = 1 # defines i_1 + * puts i # reads i_1 + * puts i + 1 # reads i_1 + * else + * i = 2 # defines i_2 + * puts i # reads i_2 + * puts i + 1 # reads i_2 + * end + * # defines i_3 = phi(i_1, i_2) + * puts i # reads i3 + * end + * ``` + */ + final VariableReadAccessCfgNode getARead() { result = SsaImpl::getARead(this) } + + /** + * Gets a first control-flow node that reads the value of this SSA definition. + * That is, a read that can be reached from this definition without passing + * through other reads. + * + * Example: + * + * ```rb + * def m b # defines b_0 + * i = 0 # defines i_0 + * puts i # first read of i_0 + * puts i + 1 + * if b # first read of b_0 + * i = 1 # defines i_1 + * puts i # first read of i_1 + * puts i + 1 + * else + * i = 2 # defines i_2 + * puts i # first read of i_2 + * puts i + 1 + * end + * # defines i_3 = phi(i_1, i_2) + * puts i # first read of i3 + * end + * ``` + */ + final VariableReadAccessCfgNode getAFirstRead() { SsaImpl::firstRead(this, result) } + + /** + * Gets a last control-flow node that reads the value of this SSA definition. + * That is, a read that can reach the end of the enclosing CFG scope, or another + * SSA definition for the source variable, without passing through any other read. + * + * Example: + * + * ```rb + * def m b # defines b_0 + * i = 0 # defines i_0 + * puts i + * puts i + 1 # last read of i_0 + * if b # last read of b_0 + * i = 1 # defines i_1 + * puts i + * puts i + 1 # last read of i_1 + * else + * i = 2 # defines i_2 + * puts i + * puts i + 1 # last read of i_2 + * end + * # defines i_3 = phi(i_1, i_2) + * puts i # last read of i3 + * end + * ``` + */ + final VariableReadAccessCfgNode getALastRead() { SsaImpl::lastRead(this, result) } + + /** + * Holds if `read1` and `read2` are adjacent reads of this SSA definition. + * That is, `read2` can be reached from `read1` without passing through + * another read. + * + * Example: + * + * ```rb + * def m b + * i = 0 # defines i_0 + * puts i # reads i_0 (read1) + * puts i + 1 # reads i_0 (read2) + * if b + * i = 1 # defines i_1 + * puts i # reads i_1 (read1) + * puts i + 1 # reads i_1 (read2) + * else + * i = 2 # defines i_2 + * puts i # reads i_2 (read1) + * puts i + 1 # reads i_2 (read2) + * end + * puts i + * end + * ``` + */ + final predicate hasAdjacentReads( + VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2 + ) { + SsaImpl::adjacentReadPair(this, read1, read2) + } + + /** + * Gets an SSA definition whose value can flow to this one in one step. This + * includes inputs to phi nodes and the prior definitions of uncertain writes. + */ + private Definition getAPhiInputOrPriorDefinition() { + result = this.(PhiNode).getAnInput() or + result = this.(CapturedCallDefinition).getPriorDefinition() + } + + /** + * Gets a definition that ultimately defines this SSA definition and is + * not itself a phi node. + * + * Example: + * + * ```rb + * def m b + * i = 0 # defines i_0 + * puts i + * puts i + 1 + * if b + * i = 1 # defines i_1 + * puts i + * puts i + 1 + * else + * i = 2 # defines i_2 + * puts i + * puts i + 1 + * end + * # defines i_3 = phi(i_1, i_2); ultimate definitions are i_1 and i_2 + * puts i + * end + * ``` + */ + final Definition getAnUltimateDefinition() { + result = this.getAPhiInputOrPriorDefinition*() and + not result instanceof PhiNode + } + + override string toString() { result = this.getControlFlowNode().toString() } + + /** Gets the location of this SSA definition. */ + Location getLocation() { result = this.getControlFlowNode().getLocation() } + } + + /** + * An SSA definition that corresponds to a write. For example `x = 10` in + * + * ```rb + * x = 10 + * puts x + * ``` + */ + class WriteDefinition extends Definition, SsaImplCommon::WriteDefinition { + private VariableWriteAccess write; + + WriteDefinition() { + exists(BasicBlock bb, int i, Variable v | + this.definesAt(v, bb, i) and + SsaImpl::variableWriteActual(bb, i, v, write) + ) + } + + /** Gets the underlying write access. */ + final VariableWriteAccess getWriteAccess() { result = write } + + /** + * Holds if this SSA definition represents a direct assignment of `value` + * to the underlying variable. + */ + predicate assigns(CfgNodes::ExprCfgNode value) { + exists(CfgNodes::ExprNodes::AssignExprCfgNode a, BasicBlock bb, int i | + this.definesAt(_, bb, i) and + a = bb.getNode(i) and + value = a.getRhs() + ) + } + + final override string toString() { result = Definition.super.toString() } + + final override Location getLocation() { result = this.getControlFlowNode().getLocation() } + } + + /** + * An SSA definition inserted at the beginning of a scope to represent an + * uninitialized local variable. For example, in + * + * ```rb + * def m + * x = 10 if b + * puts x + * end + * ``` + * + * since the assignment to `x` is conditional, an unitialized definition for + * `x` is inserted at the start of `m`. + */ + class UninitializedDefinition extends Definition, SsaImplCommon::WriteDefinition { + UninitializedDefinition() { + exists(BasicBlock bb, int i, Variable v | + this.definesAt(v, bb, i) and + SsaImpl::uninitializedWrite(bb, i, v) + ) + } + + final override string toString() { result = "" } + + final override Location getLocation() { result = this.getBasicBlock().getLocation() } + } + + /** + * An SSA definition inserted at the beginning of a scope to represent a + * captured local variable. For example, in + * + * ```rb + * def m x + * y = 0 + * x.times do |x| + * y += x + * end + * return y + * end + * ``` + * + * an entry definition for `y` is inserted at the start of the `do` block. + */ + class CapturedEntryDefinition extends Definition, SsaImplCommon::WriteDefinition { + CapturedEntryDefinition() { + exists(BasicBlock bb, int i, Variable v | + this.definesAt(v, bb, i) and + SsaImpl::capturedEntryWrite(bb, i, v) + ) + } + + final override string toString() { result = "" } + + override Location getLocation() { result = this.getBasicBlock().getLocation() } + } + + /** + * An SSA definition inserted at a call that may update the value of a captured + * variable. For example, in + * + * ```rb + * def m x + * y = 0 + * x.times do |x| + * y += x + * end + * return y + * end + * ``` + * + * a definition for `y` is inserted at the call to `times`. + */ + class CapturedCallDefinition extends Definition, SsaImplCommon::UncertainWriteDefinition { + CapturedCallDefinition() { + exists(Variable v, BasicBlock bb, int i | + this.definesAt(v, bb, i) and + SsaImpl::capturedCallWrite(bb, i, v) + ) + } + + /** + * Gets the immediately preceding definition. Since this update is uncertain, + * the value from the preceding definition might still be valid. + */ + final Definition getPriorDefinition() { result = SsaImpl::uncertainWriteDefinitionInput(this) } + + override string toString() { result = this.getControlFlowNode().toString() } + } + + /** + * A phi node. For example, in + * + * ```rb + * if b + * x = 0 + * else + * x = 1 + * end + * puts x + * ``` + * + * a phi node for `x` is inserted just before the call `puts x`. + */ + class PhiNode extends Definition, SsaImplCommon::PhiNode { + /** + * Gets an input of this phi node. + * + * Example: + * + * ```rb + * def m b + * i = 0 # defines i_0 + * puts i + * puts i + 1 + * if b + * i = 1 # defines i_1 + * puts i + * puts i + 1 + * else + * i = 2 # defines i_2 + * puts i + * puts i + 1 + * end + * # defines i_3 = phi(i_1, i_2); inputs are i_1 and i_2 + * puts i + * end + * ``` + */ + final Definition getAnInput() { this.hasInputFromBlock(result, _) } + + /** Holds if `inp` is an input to this phi node along the edge originating in `bb`. */ + predicate hasInputFromBlock(Definition inp, BasicBlock bb) { + inp = SsaImpl::phiHasInputFromBlock(this, bb) + } + + private string getSplitString() { + result = this.getBasicBlock().getFirstNode().(CfgNodes::AstCfgNode).getSplitsString() + } + + override string toString() { + exists(string prefix | + prefix = "[" + this.getSplitString() + "] " + or + not exists(this.getSplitString()) and + prefix = "" + | + result = prefix + "phi" + ) + } + + /* + * The location of a phi node is the same as the location of the first node + * in the basic block in which it is defined. + * + * Strictly speaking, the node is *before* the first node, but such a location + * does not exist in the source program. + */ + + final override Location getLocation() { + result = this.getBasicBlock().getFirstNode().getLocation() + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll new file mode 100644 index 00000000000..d3cddf8a3a0 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll @@ -0,0 +1,459 @@ +private import ruby +private import codeql.ruby.CFG +private import DataFlowPrivate +private import codeql.ruby.typetracking.TypeTracker +private import codeql.ruby.ast.internal.Module +private import FlowSummaryImpl as FlowSummaryImpl +private import codeql.ruby.dataflow.FlowSummary + +newtype TReturnKind = + TNormalReturnKind() or + TBreakReturnKind() + +/** + * Gets a node that can read the value returned from `call` with return kind + * `kind`. + */ +OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) } + +/** + * A return kind. A return kind describes how a value can be returned + * from a callable. + */ +abstract class ReturnKind extends TReturnKind { + /** Gets a textual representation of this position. */ + abstract string toString(); +} + +/** + * A value returned from a callable using a `return` statement or an expression + * body, that is, a "normal" return. + */ +class NormalReturnKind extends ReturnKind, TNormalReturnKind { + override string toString() { result = "return" } +} + +/** + * A value returned from a callable using a `break` statement. + */ +class BreakReturnKind extends ReturnKind, TBreakReturnKind { + override string toString() { result = "break" } +} + +/** A callable defined in library code, identified by a unique string. */ +abstract class LibraryCallable extends string { + bindingset[this] + LibraryCallable() { any() } + + /** Gets a call to this library callable. */ + abstract Call getACall(); +} + +/** + * A callable. This includes callables from source code, as well as callables + * defined in library code. + */ +class DataFlowCallable extends TDataFlowCallable { + /** Gets the underlying source code callable, if any. */ + Callable asCallable() { this = TCfgScope(result) } + + /** Gets the underlying library callable, if any. */ + LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) } + + /** Gets a textual representation of this callable. */ + string toString() { result = [this.asCallable().toString(), this.asLibraryCallable()] } + + /** Gets the location of this callable. */ + Location getLocation() { result = this.asCallable().getLocation() } +} + +/** + * A call. This includes calls from source code, as well as call(back)s + * inside library callables with a flow summary. + */ +class DataFlowCall extends TDataFlowCall { + /** Gets the enclosing callable. */ + DataFlowCallable getEnclosingCallable() { none() } + + /** Gets the underlying source code call, if any. */ + CfgNodes::ExprNodes::CallCfgNode asCall() { none() } + + /** Gets a textual representation of this call. */ + string toString() { none() } + + /** Gets the location of this call. */ + Location getLocation() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * A synthesized call inside a callable with a flow summary. + * + * For example, in + * ```rb + * ints.each do |i| + * puts i + * end + * ``` + * + * there is a call to the block argument inside `each`. + */ +class SummaryCall extends DataFlowCall, TSummaryCall { + private FlowSummaryImpl::Public::SummarizedCallable c; + private DataFlow::Node receiver; + + SummaryCall() { this = TSummaryCall(c, receiver) } + + /** Gets the data flow node that this call targets. */ + DataFlow::Node getReceiver() { result = receiver } + + override DataFlowCallable getEnclosingCallable() { result = c } + + override string toString() { result = "[summary] call to " + receiver + " in " + c } + + override Location getLocation() { result = c.getLocation() } +} + +private class NormalCall extends DataFlowCall, TNormalCall { + private CfgNodes::ExprNodes::CallCfgNode c; + + NormalCall() { this = TNormalCall(c) } + + override CfgNodes::ExprNodes::CallCfgNode asCall() { result = c } + + override DataFlowCallable getEnclosingCallable() { result = TCfgScope(c.getScope()) } + + override string toString() { result = c.toString() } + + override Location getLocation() { result = c.getLocation() } +} + +pragma[nomagic] +private predicate methodCall( + CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode, string method +) { + exists(DataFlow::Node nodeTo | + method = call.getExpr().(MethodCall).getMethodName() and + nodeTo.asExpr() = call.getReceiver() and + sourceNode.flowsTo(nodeTo) + ) +} + +private Block yieldCall(CfgNodes::ExprNodes::CallCfgNode call) { + call.getExpr() instanceof YieldCall and + exists(BlockParameterNode node | + node = trackBlock(result) and + node.getMethod() = call.getExpr().getEnclosingMethod() + ) +} + +pragma[nomagic] +private predicate superCall(CfgNodes::ExprNodes::CallCfgNode call, Module superClass, string method) { + call.getExpr() instanceof SuperCall and + exists(Module tp | + tp = call.getExpr().getEnclosingModule().getModule() and + superClass = tp.getSuperClass() and + method = call.getExpr().getEnclosingMethod().getName() + ) +} + +pragma[nomagic] +private predicate instanceMethodCall(CfgNodes::ExprNodes::CallCfgNode call, Module tp, string method) { + exists(DataFlow::LocalSourceNode sourceNode | + methodCall(call, sourceNode, method) and + sourceNode = trackInstance(tp) + ) +} + +cached +private module Cached { + cached + newtype TDataFlowCallable = + TCfgScope(CfgScope scope) or + TLibraryCallable(LibraryCallable callable) + + cached + newtype TDataFlowCall = + TNormalCall(CfgNodes::ExprNodes::CallCfgNode c) or + TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, DataFlow::Node receiver) { + FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) + } + + cached + CfgScope getTarget(CfgNodes::ExprNodes::CallCfgNode call) { + // Temporarily disable operation resolution (due to bad performance) + not call.getExpr() instanceof Operation and + ( + exists(string method | + exists(Module tp | + instanceMethodCall(call, tp, method) and + result = lookupMethod(tp, method) and + if result.(Method).isPrivate() + then + exists(Self self | + self = call.getReceiver().getExpr() and + pragma[only_bind_out](self.getEnclosingModule().getModule().getSuperClass*()) = + pragma[only_bind_out](result.getEnclosingModule().getModule()) + ) and + // For now, we restrict the scope of top-level declarations to their file. + // This may remove some plausible targets, but also removes a lot of + // implausible targets + if result.getEnclosingModule() instanceof Toplevel + then result.getFile() = call.getFile() + else any() + else any() + ) + or + exists(DataFlow::LocalSourceNode sourceNode | + methodCall(call, sourceNode, method) and + sourceNode = trackSingletonMethod(result, method) + ) + ) + or + exists(Module superClass, string method | + superCall(call, superClass, method) and + result = lookupMethod(superClass, method) + ) + or + result = yieldCall(call) + ) + } +} + +import Cached + +private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) { + t.start() and + ( + result.asExpr().getExpr() instanceof NilLiteral and tp = TResolved("NilClass") + or + result.asExpr().getExpr().(BooleanLiteral).isFalse() and tp = TResolved("FalseClass") + or + result.asExpr().getExpr().(BooleanLiteral).isTrue() and tp = TResolved("TrueClass") + or + result.asExpr().getExpr() instanceof IntegerLiteral and tp = TResolved("Integer") + or + result.asExpr().getExpr() instanceof FloatLiteral and tp = TResolved("Float") + or + result.asExpr().getExpr() instanceof RationalLiteral and tp = TResolved("Rational") + or + result.asExpr().getExpr() instanceof ComplexLiteral and tp = TResolved("Complex") + or + result.asExpr().getExpr() instanceof StringlikeLiteral and tp = TResolved("String") + or + exists(ConstantReadAccess array, MethodCall mc | + result.asExpr().getExpr() = mc and + mc.getMethodName() = "[]" and + mc.getReceiver() = array and + array.getName() = "Array" and + array.hasGlobalScope() and + tp = TResolved("Array") + ) + or + result.asExpr().getExpr() instanceof HashLiteral and tp = TResolved("Hash") + or + result.asExpr().getExpr() instanceof MethodBase and tp = TResolved("Symbol") + or + result.asParameter() instanceof BlockParameter and tp = TResolved("Proc") + or + result.asExpr().getExpr() instanceof Lambda and tp = TResolved("Proc") + or + exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node nodeTo | + call.getExpr().(MethodCall).getMethodName() = "new" and + nodeTo.asExpr() = call.getReceiver() and + trackModule(tp).flowsTo(nodeTo) and + result.asExpr() = call + ) + or + // `self` in method + exists(Self self, Method enclosing | + self = result.asExpr().getExpr() and + enclosing = self.getEnclosingMethod() and + tp = enclosing.getEnclosingModule().getModule() and + not self.getEnclosingModule().getEnclosingMethod() = enclosing + ) + or + // `self` in singleton method + exists(Self self, MethodBase enclosing | + self = result.asExpr().getExpr() and + flowsToSingletonMethodObject(trackInstance(tp), enclosing) and + enclosing = self.getEnclosingMethod() and + not self.getEnclosingModule().getEnclosingMethod() = enclosing + ) + or + // `self` in top-level + exists(Self self, Toplevel enclosing | + self = result.asExpr().getExpr() and + enclosing = self.getEnclosingModule() and + tp = TResolved("Object") and + not self.getEnclosingMethod().getEnclosingModule() = enclosing + ) + or + // a module or class + exists(Module m | + result = trackModule(m) and + if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module") + ) + ) + or + exists(TypeTracker t2, StepSummary summary | + result = trackInstanceRec(tp, t2, summary) and t = t2.append(summary) + ) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackInstanceRec(Module tp, TypeTracker t, StepSummary summary) { + StepSummary::step(trackInstance(tp, t), result, summary) +} + +private DataFlow::LocalSourceNode trackInstance(Module tp) { + result = trackInstance(tp, TypeTracker::end()) +} + +private DataFlow::LocalSourceNode trackBlock(Block block, TypeTracker t) { + t.start() and result.asExpr().getExpr() = block + or + exists(TypeTracker t2, StepSummary summary | + result = trackBlockRec(block, t2, summary) and t = t2.append(summary) + ) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackBlockRec(Block block, TypeTracker t, StepSummary summary) { + StepSummary::step(trackBlock(block, t), result, summary) +} + +private DataFlow::LocalSourceNode trackBlock(Block block) { + result = trackBlock(block, TypeTracker::end()) +} + +private predicate singletonMethod(MethodBase method, Expr object) { + object = method.(SingletonMethod).getObject() + or + exists(SingletonClass cls | + object = cls.getValue() and method instanceof Method and method = cls.getAMethod() + ) +} + +pragma[nomagic] +private predicate flowsToSingletonMethodObject(DataFlow::LocalSourceNode nodeFrom, MethodBase method) { + exists(DataFlow::LocalSourceNode nodeTo | + nodeFrom.flowsTo(nodeTo) and + singletonMethod(method, nodeTo.asExpr().getExpr()) + ) +} + +pragma[nomagic] +private predicate moduleFlowsToSingletonMethodObject(Module m, MethodBase method) { + flowsToSingletonMethodObject(trackModule(m), method) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackSingletonMethod0(MethodBase method, TypeTracker t) { + t.start() and + ( + flowsToSingletonMethodObject(result, method) + or + exists(Module m | result = trackModule(m) and moduleFlowsToSingletonMethodObject(m, method)) + ) + or + exists(TypeTracker t2, StepSummary summary | + result = trackSingletonMethod0Rec(method, t2, summary) and t = t2.append(summary) + ) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackSingletonMethod0Rec( + MethodBase method, TypeTracker t, StepSummary summary +) { + StepSummary::step(trackSingletonMethod0(method, t), result, summary) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackSingletonMethod(MethodBase m, string name) { + result = trackSingletonMethod0(m, TypeTracker::end()) and + name = m.getName() +} + +private DataFlow::Node selfInModule(Module tp) { + exists(Self self, ModuleBase enclosing | + self = result.asExpr().getExpr() and + enclosing = self.getEnclosingModule() and + tp = enclosing.getModule() and + not self.getEnclosingMethod().getEnclosingModule() = enclosing + ) +} + +private DataFlow::LocalSourceNode trackModule(Module tp, TypeTracker t) { + t.start() and + ( + // ConstantReadAccess to Module + resolveScopeExpr(result.asExpr().getExpr()) = tp + or + // `self` reference to Module + result = selfInModule(tp) + ) + or + exists(TypeTracker t2, StepSummary summary | + result = trackModuleRec(tp, t2, summary) and t = t2.append(summary) + ) +} + +pragma[nomagic] +private DataFlow::LocalSourceNode trackModuleRec(Module tp, TypeTracker t, StepSummary summary) { + StepSummary::step(trackModule(tp, t), result, summary) +} + +private DataFlow::LocalSourceNode trackModule(Module tp) { + result = trackModule(tp, TypeTracker::end()) +} + +/** Gets a viable run-time target for the call `call`. */ +DataFlowCallable viableCallable(DataFlowCall call) { + result = TCfgScope(getTarget(call.asCall())) and + not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall` + or + exists(LibraryCallable callable | + result = TLibraryCallable(callable) and + call.asCall().getExpr() = callable.getACall() + ) +} + +/** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. This is the case if the + * qualifier accesses a parameter of the enclosing callable `c` (including + * the implicit `self` parameter). + */ +predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() } + +/** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ +DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() } + +/** + * Holds if `e` is an `ExprNode` that may be returned by a call to `c`. + */ +predicate exprNodeReturnedFrom(DataFlow::ExprNode e, Callable c) { + exists(ReturningNode r | + r.getEnclosingCallable().asCallable() = c and + ( + r.(ExplicitReturnNode).getReturningNode().getReturnedValueNode() = e.asExpr() or + r.(ExprReturnNode) = e + ) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll new file mode 100644 index 00000000000..4ca06c93362 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll @@ -0,0 +1,4559 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, Content c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, int i) { + this.asNode().(ParamNode).isParameterOf(c, i) + } + + int getPosition() { this.isParameterOf(_, result) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) and + config.isSource(n) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) and + config.isSink(n) + ) +} + +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) + or + config.isBarrierOut(n) and + not config.isSink(n) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) } + +pragma[nomagic] +private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) } + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(n1, n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(n1, n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + read(node1.asNode(), c, node2.asNode()) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(node1.asNode(), tc, node2.asNode(), contentType) and + read(_, tc.getContent(), _, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + not fullBarrier(node, config) and + ( + sourceNode(node, config) and + cc = false + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + read(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + fwdFlow(node, config) and + sinkNode(node, config) and + toReturn = false + or + exists(NodeEx mid | + localFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(NodeEx mid | + additionalLocalFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + exists(NodeEx mid | + additionalJumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, Content c | + read(node, c, mid, config) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node | + fwdFlow(node, pragma[only_bind_into](config)) and + read(node, c, mid, config) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + private predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + predicate localFlowEntry(NodeEx node, Configuration config) { + Stage2::revFlow(node, config) and + ( + sourceNode(node, config) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParamNodeEx or + node.asNode() instanceof OutNodeExt or + store(_, _, node, _, config) or + read(_, _, node, config) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _, config) or + read(node, _, next, config) + ) + or + node instanceof FlowCheckNode + or + sinkNode(node, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, NodeEx node2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and + Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() // irrelevant dummy value + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + or + exists(NodeEx mid | + localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf, + Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { + not clear(node, ap) and + if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any() + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) { + exists(AccessPathFront apf | + Stage3::revFlow(node, true, _, apf, config) and + Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n | + Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { + localFlowEntry(node, config) and + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) { + exists(DataFlowCallable c, AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, apa, _) and + Stage4::revFlow(n, true, _, apa0, config) and + Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, AccessPath ap) { + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n | + Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, config) and + sourceNode(node, config) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, Configuration config) { + sinkNode(node, pragma[only_bind_into](config)) and + Stage4::revFlow(node, pragma[only_bind_into](config)) and + ( + // A sink that is also a source ... + sourceNode(node, config) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, TAccessPathNil(_)) and + pragma[only_bind_into](config) = mid.getConfiguration() + ) + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private PathNode getASuccessorIfHidden() { + this.(PathNodeImpl).isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.(PathNodeImpl).isHidden() and + not result.(PathNodeImpl).isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNode n) { + n instanceof PathNodeSink or directReach(n.getASuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + query predicate subpaths = Subpaths::subpaths/4; +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(), + result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNodeEx() = sink.getNodeEx() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbindConf(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override NodeEx getNodeEx() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, config) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep( + PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNodeEx() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa +) { + exists(ArgNode arg | + arg = mid.getNodeEx().asNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + apa = ap.getApprox() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, _, apa, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, i, outercc, call, ap, apa) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, + Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, int pos | + mid.getNodeEx() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, + AccessPathApprox apa +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | + pathThroughCallable0(call, mid, kind, cc, ap, apa) and + out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, AccessPath apout + ) { + pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, innercc, sc, _) and + paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _, + unbindConf(arg.getConfiguration())) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, apout) and + ret.getNodeEx() = retnode and + kind = retnode.getKind() and + innercc = ret.getCallContext() and + sc = ret.getSummaryCtx() and + ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and + apout = ret.getAp() and + not ret.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) { + exists(ParamNodeEx p, NodeEx o, AccessPath apout | + pragma[only_bind_into](arg).getASuccessor() = par and + pragma[only_bind_into](arg).getASuccessor() = out and + subpaths03(arg, p, ret, o, apout) and + par.getNodeEx() = p and + out.getNodeEx() = o and + out.getAp() = apout + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath. + */ + predicate retReach(PathNode n) { + subpaths(_, _, n, _) + or + exists(PathNode mid | + retReach(mid) and + n.getASuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config +) { + stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + config.isSink(n) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(node.getDataFlowType()) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, + Configuration config + ) { + sinkNode(node, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, sc1, sc2, ap, config) and + not clearsContentCached(node.asNode(), ap.getHead()) and + not fullBarrier(node, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + not clearsContentCached(node.asNode(), ap.getHead().getContent()) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } + + NodeEx getNodeEx() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } + + NodeEx getNodeEx() { result = node } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(), + this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgNode arg | + arg = mid.getNodeEx().asNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + DataFlowCall call, RevPartialAccessPath ap, Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p | + mid.getNodeEx() = p and + p.getPosition() = pos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, + Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | + revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and + revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, int pos | + revPartialPathThroughCallable0(call, mid, pos, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 00000000000..4ca06c93362 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,4559 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, Content c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, int i) { + this.asNode().(ParamNode).isParameterOf(c, i) + } + + int getPosition() { this.isParameterOf(_, result) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) and + config.isSource(n) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) and + config.isSink(n) + ) +} + +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) + or + config.isBarrierOut(n) and + not config.isSink(n) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) } + +pragma[nomagic] +private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) } + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(n1, n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(n1, n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + read(node1.asNode(), c, node2.asNode()) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(node1.asNode(), tc, node2.asNode(), contentType) and + read(_, tc.getContent(), _, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + not fullBarrier(node, config) and + ( + sourceNode(node, config) and + cc = false + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + read(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + fwdFlow(node, config) and + sinkNode(node, config) and + toReturn = false + or + exists(NodeEx mid | + localFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(NodeEx mid | + additionalLocalFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + exists(NodeEx mid | + additionalJumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, Content c | + read(node, c, mid, config) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node | + fwdFlow(node, pragma[only_bind_into](config)) and + read(node, c, mid, config) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + private predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + predicate localFlowEntry(NodeEx node, Configuration config) { + Stage2::revFlow(node, config) and + ( + sourceNode(node, config) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParamNodeEx or + node.asNode() instanceof OutNodeExt or + store(_, _, node, _, config) or + read(_, _, node, config) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _, config) or + read(node, _, next, config) + ) + or + node instanceof FlowCheckNode + or + sinkNode(node, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, NodeEx node2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and + Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() // irrelevant dummy value + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + or + exists(NodeEx mid | + localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf, + Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { + not clear(node, ap) and + if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any() + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) { + exists(AccessPathFront apf | + Stage3::revFlow(node, true, _, apf, config) and + Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n | + Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { + localFlowEntry(node, config) and + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + private predicate localStep( + NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + bindingset[node, ap] + private predicate filter(NodeEx node, Ap ap) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + sourceNode(node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and + ccc.matchesCall(call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + sinkNode(node, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0), + pragma[only_bind_into](config)) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap | + revFlow(arg, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) { + exists(DataFlowCallable c, AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, apa, _) and + Stage4::revFlow(n, true, _, apa0, config) and + Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, AccessPath ap) { + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n | + Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, config) and + sourceNode(node, config) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, Configuration config) { + sinkNode(node, pragma[only_bind_into](config)) and + Stage4::revFlow(node, pragma[only_bind_into](config)) and + ( + // A sink that is also a source ... + sourceNode(node, config) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, TAccessPathNil(_)) and + pragma[only_bind_into](config) = mid.getConfiguration() + ) + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private PathNode getASuccessorIfHidden() { + this.(PathNodeImpl).isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.(PathNodeImpl).isHidden() and + not result.(PathNodeImpl).isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNode n) { + n instanceof PathNodeSink or directReach(n.getASuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + query predicate subpaths = Subpaths::subpaths/4; +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(), + result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNodeEx() = sink.getNodeEx() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbindConf(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override NodeEx getNodeEx() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, config) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep( + PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNodeEx() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa +) { + exists(ArgNode arg | + arg = mid.getNodeEx().asNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + apa = ap.getApprox() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, _, apa, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, i, outercc, call, ap, apa) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, + Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, int pos | + mid.getNodeEx() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, + AccessPathApprox apa +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | + pathThroughCallable0(call, mid, kind, cc, ap, apa) and + out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, AccessPath apout + ) { + pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, innercc, sc, _) and + paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _, + unbindConf(arg.getConfiguration())) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, apout) and + ret.getNodeEx() = retnode and + kind = retnode.getKind() and + innercc = ret.getCallContext() and + sc = ret.getSummaryCtx() and + ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and + apout = ret.getAp() and + not ret.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) { + exists(ParamNodeEx p, NodeEx o, AccessPath apout | + pragma[only_bind_into](arg).getASuccessor() = par and + pragma[only_bind_into](arg).getASuccessor() = out and + subpaths03(arg, p, ret, o, apout) and + par.getNodeEx() = p and + out.getNodeEx() = o and + out.getAp() = apout + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath. + */ + predicate retReach(PathNode n) { + subpaths(_, _, n, _) + or + exists(PathNode mid | + retReach(mid) and + n.getASuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config +) { + stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + config.isSink(n) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(node.getDataFlowType()) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, + Configuration config + ) { + sinkNode(node, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, sc1, sc2, ap, config) and + not clearsContentCached(node.asNode(), ap.getHead()) and + not fullBarrier(node, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + not clearsContentCached(node.asNode(), ap.getHead().getContent()) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } + + NodeEx getNodeEx() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } + + NodeEx getNodeEx() { result = node } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(), + this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgNode arg | + arg = mid.getNodeEx().asNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + DataFlowCall call, RevPartialAccessPath ap, Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p | + mid.getNodeEx() = p and + p.getPosition() = pos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, + Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | + revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and + revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, int pos | + revPartialPathThroughCallable0(call, mid, pos, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll new file mode 100644 index 00000000000..f43a550af57 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll @@ -0,0 +1,1294 @@ +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +import Cached + +/** + * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathFront` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision during pruning. + */ +predicate accessPathApproxCostLimits(int apLimit, int tupleLimit) { + apLimit = 10 and + tupleLimit = 10000 +} + +/** + * The cost limits for the `AccessPathApprox` to `AccessPath` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathApprox` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision. + */ +predicate accessPathCostLimits(int apLimit, int tupleLimit) { + apLimit = 5 and + tupleLimit = 1000 +} + +/** + * Provides a simple data-flow analysis for resolving lambda calls. The analysis + * currently excludes read-steps, store-steps, and flow-through. + * + * The analysis uses non-linear recursion: When computing a flow path in or out + * of a call, we use the results of the analysis recursively to resolve lambda + * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly. + */ +private module LambdaFlow { + private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallable(call), i) + } + + private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallableLambda(call, _), i) + } + + private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParamNonLambda(call, i, p) and + arg.argumentOf(call, i) + ) + } + + private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParamLambda(call, i, p) and + arg.argumentOf(call, i) + ) + } + + private newtype TReturnPositionSimple = + TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) { + exists(ReturnNode ret | + c = getNodeEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + pragma[noinline] + private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) { + result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) { + result = TReturnPositionSimple0(viableCallable(call), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosLambda( + DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind + ) { + result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind) + } + + private predicate viableReturnPosOutNonLambda( + DataFlowCall call, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosNonLambda(call, kind) and + out = getAnOutNode(call, kind) + ) + } + + private predicate viableReturnPosOutLambda( + DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosLambda(call, lastCall, kind) and + out = getAnOutNode(call, kind) + ) + } + + /** + * Holds if data can flow (inter-procedurally) from `node` (of type `t`) to + * the lambda call `lambdaCall`. + * + * The parameter `toReturn` indicates whether the path from `node` to + * `lambdaCall` goes through a return, and `toJump` whether the path goes + * through a jump step. + * + * The call context `lastCall` records the last call on the path from `node` + * to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing + * callable of `lambdaCall`. + */ + pragma[nomagic] + predicate revLambdaFlow( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and + if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode + then compatibleTypes(t, getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + predicate revLambdaFlow0( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + lambdaCall(lambdaCall, kind, node) and + t = getNodeDataFlowType(node) and + toReturn = false and + toJump = false and + lastCall = TDataFlowCallNone() + or + // local flow + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall) + | + simpleLocalFlowStep(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // jump step + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and + toReturn = false and + toJump = true and + lastCall = TDataFlowCallNone() + | + jumpStepCached(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // flow into a callable + exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call | + revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and + ( + if lastCall0 = TDataFlowCallNone() and toJump = false + then lastCall = TDataFlowCallSome(call) + else lastCall = lastCall0 + ) and + toReturn = false + | + viableParamArgNonLambda(call, p, node) + or + viableParamArgLambda(call, p, node) // non-linear recursion + ) + or + // flow out of a callable + exists(TReturnPositionSimple pos | + revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and + getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and + toReturn = true + ) + } + + pragma[nomagic] + predicate revLambdaFlowOutLambdaCall( + DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump, + DataFlowCall call, DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + exists(ReturnKindExt rk | + out = rk.getAnOutNode(call) and + lambdaCall(call, _, _) + ) + } + + pragma[nomagic] + predicate revLambdaFlowOut( + DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t, + boolean toJump, DataFlowCallOption lastCall + ) { + exists(DataFlowCall call, OutNode out | + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + viableReturnPosOutNonLambda(call, pos, out) + or + // non-linear recursion + revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and + viableReturnPosOutLambda(call, _, pos, out) + ) + } + + pragma[nomagic] + predicate revLambdaFlowIn( + DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump, + DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall) + } +} + +private DataFlowCallable viableCallableExt(DataFlowCall call) { + result = viableCallable(call) + or + result = viableCallableLambda(call, _) +} + +cached +private module Cached { + /** + * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to + * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby + * collapsing the two stages. + */ + cached + predicate forceCachingInSameStage() { any() } + + cached + predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() } + + cached + predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) { + c = call.getEnclosingCallable() + } + + cached + predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) } + + cached + predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) } + + cached + predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) } + + cached + predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) } + + cached + predicate outNodeExt(Node n) { + n instanceof OutNode + or + n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode + } + + cached + predicate hiddenNode(Node n) { nodeIsHidden(n) } + + cached + OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) { + result = getAnOutNode(call, k.(ValueReturnKind).getKind()) + or + exists(ArgNode arg | + result.(PostUpdateNode).getPreUpdateNode() = arg and + arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition()) + ) + } + + cached + predicate returnNodeExt(Node n, ReturnKindExt k) { + k = TValueReturn(n.(ReturnNode).getKind()) + or + exists(ParamNode p, int pos | + parameterValueFlowsToPreUpdate(p, n) and + p.isParameterOf(_, pos) and + k = TParamUpdate(pos) + ) + } + + cached + predicate castNode(Node n) { n instanceof CastNode } + + cached + predicate castingNode(Node n) { + castNode(n) or + n instanceof ParamNode or + n instanceof OutNodeExt or + // For reads, `x.f`, we want to check that the tracked type after the read (which + // is obtained by popping the head of the access path stack) is compatible with + // the type of `x.f`. + read(_, _, n) + } + + cached + predicate parameterNode(Node n, DataFlowCallable c, int i) { + n.(ParameterNode).isParameterOf(c, i) + } + + cached + predicate argumentNode(Node n, DataFlowCall call, int pos) { + n.(ArgumentNode).argumentOf(call, pos) + } + + /** + * Gets a viable target for the lambda call `call`. + * + * `lastCall` records the call required to reach `call` in order for the result + * to be a viable target, if any. + */ + cached + DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) { + exists(Node creation, LambdaCallKind kind | + LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and + lambdaCreation(creation, kind, result) + ) + } + + /** + * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`. + * The instance parameter is considered to have index `-1`. + */ + pragma[nomagic] + private predicate viableParam(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallableExt(call), i) + } + + /** + * Holds if `arg` is a possible argument to `p` in `call`, taking virtual + * dispatch into account. + */ + cached + predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParam(call, i, p) and + arg.argumentOf(call, i) and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p)) + ) + } + + pragma[nomagic] + private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) { + viableCallableExt(call) = result.getCallable() and + kind = result.getKind() + } + + /** + * Holds if a value at return position `pos` can be returned to `out` via `call`, + * taking virtual dispatch into account. + */ + cached + predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) { + exists(ReturnKindExt kind | + pos = viableReturnPos(call, kind) and + out = kind.getAnOutNode(call) + ) + } + + /** Provides predicates for calculating flow-through summaries. */ + private module FlowThrough { + /** + * The first flow-through approximation: + * + * - Input access paths are abstracted with a Boolean parameter + * that indicates (non-)emptiness. + */ + private module Cand { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps. + * + * `read` indicates whether it is contents of `p` that can flow to `node`. + */ + pragma[nomagic] + private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) { + p = node and + read = false + or + // local flow + exists(Node mid | + parameterValueFlowCand(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlowCand(p, mid, false) and + read(mid, _, node) and + read = true + ) + or + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, false) and + argumentValueFlowsThroughCand(arg, node, read) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, read) and + argumentValueFlowsThroughCand(arg, node, false) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) { + parameterValueFlowCand(p, arg, read) + } + + pragma[nomagic] + predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) { + parameterValueFlowCand(p, n.getPreUpdateNode(), false) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps, not taking call contexts + * into account. + * + * `read` indicates whether it is contents of `p` that can flow to the return + * node. + */ + predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) { + exists(ReturnNode ret | + parameterValueFlowCand(p, ret, read) and + kind = ret.getKind() + ) + } + + pragma[nomagic] + private predicate argumentValueFlowsThroughCand0( + DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturnCand(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only value-preserving steps, + * not taking call contexts into account. + * + * `read` indicates whether it is contents of `arg` that can flow to `out`. + */ + predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThroughCand0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + ) + } + + predicate cand(ParamNode p, Node n) { + parameterValueFlowCand(p, n, _) and + ( + parameterValueFlowReturnCand(p, _, _) + or + parameterValueFlowsToPreUpdateCand(p, _) + ) + } + } + + /** + * The final flow-through calculation: + * + * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`) + * or summarized as a single read step with before and after types recorded + * in the `ReadStepTypesOption` parameter. + * - Types are checked using the `compatibleTypes()` relation. + */ + private module Final { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) { + parameterValueFlow0(p, node, read) and + if node instanceof CastingNode + then + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node)) + or + // getter + compatibleTypes(read.getContentType(), getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) { + p = node and + Cand::cand(p, _) and + read = TReadStepTypesNone() + or + // local flow + exists(Node mid | + parameterValueFlow(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlow(p, mid, TReadStepTypesNone()) and + readStepWithTypes(mid, read.getContainerType(), read.getContent(), node, + read.getContentType()) and + Cand::parameterValueFlowReturnCand(p, _, true) and + compatibleTypes(getNodeDataFlowType(p), read.getContainerType()) + ) + or + parameterValueFlow0_0(TReadStepTypesNone(), p, node, read) + } + + pragma[nomagic] + private predicate parameterValueFlow0_0( + ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read + ) { + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArg(p, arg, mustBeNone) and + argumentValueFlowsThrough(arg, read, node) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArg(p, arg, read) and + argumentValueFlowsThrough(arg, mustBeNone, node) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) { + parameterValueFlow(p, arg, read) and + Cand::argumentValueFlowsThroughCand(arg, _, _) + } + + pragma[nomagic] + private predicate argumentValueFlowsThrough0( + DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturn(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + pragma[nomagic] + predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThrough0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + | + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out)) + or + // getter + compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and + compatibleTypes(read.getContentType(), getNodeDataFlowType(out)) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and a single read step, not taking call + * contexts into account, thus representing a getter-step. + */ + predicate getterStep(ArgNode arg, Content c, Node out) { + argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps and possibly a single read + * step. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + private predicate parameterValueFlowReturn( + ParamNode p, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ReturnNode ret | + parameterValueFlow(p, ret, read) and + kind = ret.getKind() + ) + } + } + + import Final + } + + import FlowThrough + + cached + private module DispatchWithCallContext { + /** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. + */ + pragma[nomagic] + private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) { + mayBenefitFromCallContext(call, callable) + or + callEnclosingCallable(call, callable) and + exists(viableCallableLambda(call, TDataFlowCallSome(_))) + } + + /** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ + pragma[nomagic] + private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContext(call, ctx) + or + result = viableCallableLambda(call, TDataFlowCallSome(ctx)) + or + exists(DataFlowCallable enclosing | + mayBenefitFromCallContextExt(call, enclosing) and + enclosing = viableCallableExt(ctx) and + result = viableCallableLambda(call, TDataFlowCallNone()) + ) + } + + /** + * Holds if the call context `ctx` reduces the set of viable run-time + * dispatch targets of call `call` in `c`. + */ + cached + predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, c) and + c = viableCallableExt(ctx) and + ctxtgts = count(viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(viableCallableExt(call)) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls for which a context + * makes a difference. + */ + cached + DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInCallContext(call, _, ctx) + } + + /** + * Holds if flow returning from callable `c` to call `call` might return + * further and if this path restricts the set of call sites that can be + * returned to. + */ + cached + predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, _) and + c = viableCallableExt(call) and + ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls and results for which + * the return flow from the result to `call` restricts the possible context + * `ctx`. + */ + cached + DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInReturn(result, call) + } + } + + import DispatchWithCallContext + + /** + * Holds if `p` can flow to the pre-update node associated with post-update + * node `n`, in the same callable, using only value-preserving steps. + */ + private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) { + parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone()) + } + + private predicate store( + Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + read(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + + cached + predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) } + + /** + * Holds if data can flow from `node1` to `node2` via a direct assignment to + * `f`. + * + * This includes reverse steps through reads when the result of the read has + * been stored into, in order to handle cases like `x.f1.f2 = y`. + */ + cached + predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) { + store(node1, tc.getContent(), node2, contentType, tc.getContainerType()) + } + + /** + * Holds if data can flow from `fromNode` to `toNode` because they are the post-update + * nodes of some function output and input respectively, where the output and input + * are aliases. A typical example is a function returning `this`, implementing a fluent + * interface. + */ + private predicate reverseStepThroughInputOutputAlias( + PostUpdateNode fromNode, PostUpdateNode toNode + ) { + exists(Node fromPre, Node toPre | + fromPre = fromNode.getPreUpdateNode() and + toPre = toNode.getPreUpdateNode() + | + exists(DataFlowCall c | + // Does the language-specific simpleLocalFlowStep already model flow + // from function input to output? + fromPre = getAnOutNode(c, _) and + toPre.(ArgNode).argumentOf(c, _) and + simpleLocalFlowStep(toPre.(ArgNode), fromPre) + ) + or + argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre) + ) + } + + cached + predicate simpleLocalFlowStepExt(Node node1, Node node2) { + simpleLocalFlowStep(node1, node2) or + reverseStepThroughInputOutputAlias(node1, node2) + } + + /** + * Holds if the call context `call` improves virtual dispatch in `callable`. + */ + cached + predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) { + reducedViableImplInCallContext(_, callable, call) + } + + /** + * Holds if the call context `call` allows us to prune unreachable nodes in `callable`. + */ + cached + predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call)) + } + + cached + newtype TCallContext = + TAnyCallContext() or + TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or + TSomeCall() or + TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) } + + cached + newtype TReturnPosition = + TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) { + exists(ReturnNodeExt ret | + c = returnNodeGetEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + cached + newtype TLocalFlowCallContext = + TAnyLocalCall() or + TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) } + + cached + newtype TReturnKindExt = + TValueReturn(ReturnKind kind) or + TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) } + + cached + newtype TBooleanOption = + TBooleanNone() or + TBooleanSome(boolean b) { b = true or b = false } + + cached + newtype TDataFlowCallOption = + TDataFlowCallNone() or + TDataFlowCallSome(DataFlowCall call) + + cached + newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) } + + cached + newtype TAccessPathFront = + TFrontNil(DataFlowType t) or + TFrontHead(TypedContent tc) + + cached + newtype TAccessPathFrontOption = + TAccessPathFrontNone() or + TAccessPathFrontSome(AccessPathFront apf) +} + +/** + * Holds if the call context `call` either improves virtual dispatch in + * `callable` or if it allows us to prune unreachable nodes in `callable`. + */ +predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) { + recordDataFlowCallSiteDispatch(call, callable) or + recordDataFlowCallSiteUnreachable(call, callable) +} + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +class CastingNode extends Node { + CastingNode() { castingNode(this) } +} + +private predicate readStepWithTypes( + Node n1, DataFlowType container, Content c, Node n2, DataFlowType content +) { + read(n1, c, n2) and + container = getNodeDataFlowType(n1) and + content = getNodeDataFlowType(n2) +} + +private newtype TReadStepTypesOption = + TReadStepTypesNone() or + TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) { + readStepWithTypes(_, container, c, _, content) + } + +private class ReadStepTypesOption extends TReadStepTypesOption { + predicate isSome() { this instanceof TReadStepTypesSome } + + DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) } + + Content getContent() { this = TReadStepTypesSome(_, result, _) } + + DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) } + + string toString() { if this.isSome() then result = "Some(..)" else result = "None()" } +} + +/** + * A call context to restrict the targets of virtual dispatch, prune local flow, + * and match the call sites of flow into a method with flow out of a method. + * + * There are four cases: + * - `TAnyCallContext()` : No restrictions on method flow. + * - `TSpecificCall(DataFlowCall call)` : Flow entered through the + * given `call`. This call improves the set of viable + * dispatch targets for at least one method call in the current callable + * or helps prune unreachable nodes in the current callable. + * - `TSomeCall()` : Flow entered through a parameter. The + * originating call does not improve the set of dispatch targets for any + * method call in the current callable and was therefore not recorded. + * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and + * this dispatch target of `call` implies a reduced set of dispatch origins + * to which data may flow if it should reach a `return` statement. + */ +abstract class CallContext extends TCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +abstract class CallContextNoCall extends CallContext { } + +class CallContextAny extends CallContextNoCall, TAnyCallContext { + override string toString() { result = "CcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +abstract class CallContextCall extends CallContext { + /** Holds if this call context may be `call`. */ + bindingset[call] + abstract predicate matchesCall(DataFlowCall call); +} + +class CallContextSpecificCall extends CallContextCall, TSpecificCall { + override string toString() { + exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + recordDataFlowCallSite(getCall(), callable) + } + + override predicate matchesCall(DataFlowCall call) { call = this.getCall() } + + DataFlowCall getCall() { this = TSpecificCall(result) } +} + +class CallContextSomeCall extends CallContextCall, TSomeCall { + override string toString() { result = "CcSomeCall" } + + override predicate relevantFor(DataFlowCallable callable) { + exists(ParamNode p | getNodeEnclosingCallable(p) = callable) + } + + override predicate matchesCall(DataFlowCall call) { any() } +} + +class CallContextReturn extends CallContextNoCall, TReturn { + override string toString() { + exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable)) + } +} + +/** + * A call context that is relevant for pruning local flow. + */ +abstract class LocalCallContext extends TLocalFlowCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +class LocalCallContextAny extends LocalCallContext, TAnyLocalCall { + override string toString() { result = "LocalCcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall { + LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) } + + DataFlowCall call; + + DataFlowCall getCall() { result = call } + + override string toString() { result = "LocalCcCall(" + call + ")" } + + override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) } +} + +private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call)) +} + +/** + * Gets the local call context given the call context and the callable that + * the contexts apply to. + */ +LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) { + ctx.relevantFor(callable) and + if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable) + then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall() + else result instanceof LocalCallContextAny +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParamNode extends Node { + ParamNode() { parameterNode(this, _, _) } + + /** + * Holds if this node is the parameter of callable `c` at the specified + * (zero-based) position. + */ + predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) } +} + +/** A data-flow node that represents a call argument. */ +class ArgNode extends Node { + ArgNode() { argumentNode(this, _, _) } + + /** Holds if this argument occurs at the given position in the given call. */ + final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) } +} + +/** + * A node from which flow can return to the caller. This is either a regular + * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + */ +class ReturnNodeExt extends Node { + ReturnNodeExt() { returnNodeExt(this, _) } + + /** Gets the kind of this returned value. */ + ReturnKindExt getKind() { returnNodeExt(this, result) } +} + +/** + * A node to which data can flow from a call. Either an ordinary out node + * or a post-update node associated with a call argument. + */ +class OutNodeExt extends Node { + OutNodeExt() { outNodeExt(this) } +} + +/** + * An extended return kind. A return kind describes how data can be returned + * from a callable. This can either be through a returned value or an updated + * parameter. + */ +abstract class ReturnKindExt extends TReturnKindExt { + /** Gets a textual representation of this return kind. */ + abstract string toString(); + + /** Gets a node corresponding to data flow out of `call`. */ + final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) } +} + +class ValueReturnKind extends ReturnKindExt, TValueReturn { + private ReturnKind kind; + + ValueReturnKind() { this = TValueReturn(kind) } + + ReturnKind getKind() { result = kind } + + override string toString() { result = kind.toString() } +} + +class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate { + private int pos; + + ParamUpdateReturnKind() { this = TParamUpdate(pos) } + + int getPosition() { result = pos } + + override string toString() { result = "param update " + pos } +} + +/** A callable tagged with a relevant return kind. */ +class ReturnPosition extends TReturnPosition0 { + private DataFlowCallable c; + private ReturnKindExt kind; + + ReturnPosition() { this = TReturnPosition0(c, kind) } + + /** Gets the callable. */ + DataFlowCallable getCallable() { result = c } + + /** Gets the return kind. */ + ReturnKindExt getKind() { result = kind } + + /** Gets a textual representation of this return position. */ + string toString() { result = "[" + kind + "] " + c } +} + +/** + * Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this + * predicate ensures that joins go from `n` to the result instead of the other + * way around. + */ +pragma[inline] +DataFlowCallable getNodeEnclosingCallable(Node n) { + nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +/** Gets the type of `n` used for type pruning. */ +pragma[inline] +DataFlowType getNodeDataFlowType(Node n) { + nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +pragma[noinline] +private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) { + result = getNodeEnclosingCallable(ret) +} + +pragma[noinline] +private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) { + result.getCallable() = returnNodeGetEnclosingCallable(ret) and + kind = result.getKind() +} + +pragma[noinline] +ReturnPosition getReturnPosition(ReturnNodeExt ret) { + result = getReturnPosition0(ret, ret.getKind()) +} + +/** + * Checks whether `inner` can return to `call` in the call context `innercc`. + * Assumes a context of `inner = viableCallableExt(call)`. + */ +bindingset[innercc, inner, call] +predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) { + innercc instanceof CallContextAny + or + exists(DataFlowCallable c0, DataFlowCall call0 | + callEnclosingCallable(call0, inner) and + innercc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +/** + * Checks whether `call` can resolve to `calltarget` in the call context `cc`. + * Assumes a context of `calltarget = viableCallableExt(call)`. + */ +bindingset[cc, call, calltarget] +predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then calltarget = prunedViableImplInCallContext(call, ctx) + else any() + ) + or + cc instanceof CallContextSomeCall + or + cc instanceof CallContextAny + or + cc instanceof CallContextReturn +} + +/** + * Resolves a return from `callable` in `cc` to `call`. This is equivalent to + * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`. + */ +bindingset[cc, callable] +predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) { + cc instanceof CallContextAny and callable = viableCallableExt(call) + or + exists(DataFlowCallable c0, DataFlowCall call0 | + callEnclosingCallable(call0, callable) and + cc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +/** + * Resolves a call from `call` in `cc` to `result`. This is equivalent to + * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`. + */ +bindingset[call, cc] +DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then result = prunedViableImplInCallContext(call, ctx) + else result = viableCallableExt(call) + ) + or + result = viableCallableExt(call) and cc instanceof CallContextSomeCall + or + result = viableCallableExt(call) and cc instanceof CallContextAny + or + result = viableCallableExt(call) and cc instanceof CallContextReturn +} + +/** An optional Boolean value. */ +class BooleanOption extends TBooleanOption { + string toString() { + this = TBooleanNone() and result = "" + or + this = TBooleanSome(any(boolean b | result = b.toString())) + } +} + +/** An optional `DataFlowCall`. */ +class DataFlowCallOption extends TDataFlowCallOption { + string toString() { + this = TDataFlowCallNone() and + result = "(none)" + or + exists(DataFlowCall call | + this = TDataFlowCallSome(call) and + result = call.toString() + ) + } +} + +/** Content tagged with the type of a containing object. */ +class TypedContent extends MkTypedContent { + private Content c; + private DataFlowType t; + + TypedContent() { this = MkTypedContent(c, t) } + + /** Gets the content. */ + Content getContent() { result = c } + + /** Gets the container type. */ + DataFlowType getContainerType() { result = t } + + /** Gets a textual representation of this content. */ + string toString() { result = c.toString() } + + /** + * Holds if access paths with this `TypedContent` at their head always should + * be tracked at high precision. This disables adaptive access path precision + * for such access paths. + */ + predicate forceHighPrecision() { forceHighPrecision(c) } +} + +/** + * The front of an access path. This is either a head or a nil. + */ +abstract class AccessPathFront extends TAccessPathFront { + abstract string toString(); + + abstract DataFlowType getType(); + + abstract boolean toBoolNonEmpty(); + + TypedContent getHead() { this = TFrontHead(result) } + + predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) } +} + +class AccessPathFrontNil extends AccessPathFront, TFrontNil { + private DataFlowType t; + + AccessPathFrontNil() { this = TFrontNil(t) } + + override string toString() { result = ppReprType(t) } + + override DataFlowType getType() { result = t } + + override boolean toBoolNonEmpty() { result = false } +} + +class AccessPathFrontHead extends AccessPathFront, TFrontHead { + private TypedContent tc; + + AccessPathFrontHead() { this = TFrontHead(tc) } + + override string toString() { result = tc.toString() } + + override DataFlowType getType() { result = tc.getContainerType() } + + override boolean toBoolNonEmpty() { result = true } +} + +/** An optional access path front. */ +class AccessPathFrontOption extends TAccessPathFrontOption { + string toString() { + this = TAccessPathFrontNone() and result = "" + or + this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString())) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll new file mode 100644 index 00000000000..a55e65a81f6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll @@ -0,0 +1,181 @@ +/** + * Provides consistency queries for checking invariants in the language-specific + * data-flow classes and predicates. + */ + +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +private import tainttracking1.TaintTrackingParameter::Private +private import tainttracking1.TaintTrackingParameter::Public + +module Consistency { + private class RelevantNode extends Node { + RelevantNode() { + this instanceof ArgumentNode or + this instanceof ParameterNode or + this instanceof ReturnNode or + this = getAnOutNode(_, _) or + simpleLocalFlowStep(this, _) or + simpleLocalFlowStep(_, this) or + jumpStep(this, _) or + jumpStep(_, this) or + storeStep(this, _, _) or + storeStep(_, _, this) or + readStep(this, _, _) or + readStep(_, _, this) or + defaultAdditionalTaintStep(this, _) or + defaultAdditionalTaintStep(_, this) + } + } + + query predicate uniqueEnclosingCallable(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(n.getEnclosingCallable()) and + c != 1 and + msg = "Node should have one enclosing callable but has " + c + "." + ) + } + + query predicate uniqueType(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(getNodeType(n)) and + c != 1 and + msg = "Node should have one type but has " + c + "." + ) + } + + query predicate uniqueNodeLocation(Node n, string msg) { + exists(int c | + c = + count(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) and + c != 1 and + msg = "Node should have one location but has " + c + "." + ) + } + + query predicate missingLocation(string msg) { + exists(int c | + c = + strictcount(Node n | + not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + ) and + msg = "Nodes without location: " + c + ) + } + + query predicate uniqueNodeToString(Node n, string msg) { + exists(int c | + c = count(n.toString()) and + c != 1 and + msg = "Node should have one toString but has " + c + "." + ) + } + + query predicate missingToString(string msg) { + exists(int c | + c = strictcount(Node n | not exists(n.toString())) and + msg = "Nodes without toString: " + c + ) + } + + query predicate parameterCallable(ParameterNode p, string msg) { + exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and + msg = "Callable mismatch for parameter." + } + + query predicate localFlowIsLocal(Node n1, Node n2, string msg) { + simpleLocalFlowStep(n1, n2) and + n1.getEnclosingCallable() != n2.getEnclosingCallable() and + msg = "Local flow step does not preserve enclosing callable." + } + + private DataFlowType typeRepr() { result = getNodeType(_) } + + query predicate compatibleTypesReflexive(DataFlowType t, string msg) { + t = typeRepr() and + not compatibleTypes(t, t) and + msg = "Type compatibility predicate is not reflexive." + } + + query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) { + isUnreachableInCall(n, call) and + exists(DataFlowCallable c | + c = n.getEnclosingCallable() and + not viableCallable(call) = c + ) and + msg = "Call context for isUnreachableInCall is inconsistent with call graph." + } + + query predicate localCallNodes(DataFlowCall call, Node n, string msg) { + ( + n = getAnOutNode(call, _) and + msg = "OutNode and call does not share enclosing callable." + or + n.(ArgumentNode).argumentOf(call, _) and + msg = "ArgumentNode and call does not share enclosing callable." + ) and + n.getEnclosingCallable() != call.getEnclosingCallable() + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a result of `getPreUpdateNode` to be an + // instance of `PostUpdateNode`. + private Node getPre(PostUpdateNode n) { + result = n.getPreUpdateNode() + or + none() + } + + query predicate postIsNotPre(PostUpdateNode n, string msg) { + getPre(n) = n and + msg = "PostUpdateNode should not equal its pre-update node." + } + + query predicate postHasUniquePre(PostUpdateNode n, string msg) { + exists(int c | + c = count(n.getPreUpdateNode()) and + c != 1 and + msg = "PostUpdateNode should have one pre-update node but has " + c + "." + ) + } + + query predicate uniquePostUpdate(Node n, string msg) { + 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and + msg = "Node has multiple PostUpdateNodes." + } + + query predicate postIsInSameCallable(PostUpdateNode n, string msg) { + n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and + msg = "PostUpdateNode does not share callable with its pre-update node." + } + + private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) } + + query predicate reverseRead(Node n, string msg) { + exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and + msg = "Origin of readStep is missing a PostUpdateNode." + } + + query predicate argHasPostUpdate(ArgumentNode n, string msg) { + not hasPost(n) and + not isImmutableOrUnobservable(n) and + msg = "ArgumentNode is missing PostUpdateNode." + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a `PostUpdateNode` to be the target of + // `simpleLocalFlowStep`. + private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() } + + query predicate postWithInFlow(Node n, string msg) { + isPostUpdateNode(n) and + simpleLocalFlowStep(_, n) and + msg = "PostUpdateNode should not be the target of local flow." + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll new file mode 100644 index 00000000000..e78a0814a14 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll @@ -0,0 +1,11 @@ +/** + * Provides Ruby-specific definitions for use in the data flow library. + */ +module Private { + import DataFlowPrivate + import DataFlowDispatch +} + +module Public { + import DataFlowPublic +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 00000000000..5850939fb50 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,799 @@ +private import ruby +private import codeql.ruby.CFG +private import codeql.ruby.dataflow.SSA +private import DataFlowPublic +private import DataFlowDispatch +private import SsaImpl as SsaImpl +private import FlowSummaryImpl as FlowSummaryImpl + +abstract class NodeImpl extends Node { + /** Do not call: use `getEnclosingCallable()` instead. */ + abstract CfgScope getCfgScope(); + + /** Do not call: use `getLocation()` instead. */ + abstract Location getLocationImpl(); + + /** Do not call: use `toString()` instead. */ + abstract string toStringImpl(); +} + +private class ExprNodeImpl extends ExprNode, NodeImpl { + override CfgScope getCfgScope() { result = this.getExprNode().getExpr().getCfgScope() } + + override Location getLocationImpl() { result = this.getExprNode().getLocation() } + + override string toStringImpl() { result = this.getExprNode().toString() } +} + +/** Provides predicates related to local data flow. */ +module LocalFlow { + private import codeql.ruby.dataflow.internal.SsaImpl + + /** + * Holds if `nodeFrom` is a last node referencing SSA definition `def`, which + * can reach `next`. + */ + private predicate localFlowSsaInput(Node nodeFrom, Ssa::Definition def, Ssa::Definition next) { + exists(BasicBlock bb, int i | lastRefBeforeRedef(def, bb, i, next) | + def = nodeFrom.(SsaDefinitionNode).getDefinition() and + def.definesAt(_, bb, i) + or + exists(CfgNodes::ExprCfgNode e | + e = nodeFrom.asExpr() and + e = bb.getNode(i) and + e.getExpr() instanceof VariableReadAccess + ) + ) + } + + /** Gets the SSA definition node corresponding to parameter `p`. */ + SsaDefinitionNode getParameterDefNode(NamedParameter p) { + exists(BasicBlock bb, int i | + bb.getNode(i).getNode() = p.getDefiningAccess() and + result.getDefinition().definesAt(_, bb, i) + ) + } + + /** + * Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving + * SSA definition `def`. + */ + predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) { + // Flow from assignment into SSA definition + def.(Ssa::WriteDefinition).assigns(nodeFrom.asExpr()) and + nodeTo.(SsaDefinitionNode).getDefinition() = def + or + // Flow from SSA definition to first read + def = nodeFrom.(SsaDefinitionNode).getDefinition() and + nodeTo.asExpr() = def.getAFirstRead() + or + // Flow from read to next read + exists( + CfgNodes::ExprNodes::VariableReadAccessCfgNode read1, + CfgNodes::ExprNodes::VariableReadAccessCfgNode read2 + | + def.hasAdjacentReads(read1, read2) and + nodeTo.asExpr() = read2 + | + nodeFrom.asExpr() = read1 + or + read1 = nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() + ) + or + // Flow into phi node + exists(Ssa::PhiNode phi | + localFlowSsaInput(nodeFrom, def, phi) and + phi = nodeTo.(SsaDefinitionNode).getDefinition() and + def = phi.getAnInput() + ) + // TODO + // or + // // Flow into uncertain SSA definition + // exists(LocalFlow::UncertainExplicitSsaDefinition uncertain | + // localFlowSsaInput(nodeFrom, def, uncertain) and + // uncertain = nodeTo.(SsaDefinitionNode).getDefinition() and + // def = uncertain.getPriorDefinition() + // ) + } +} + +/** An argument of a call (including qualifier arguments). */ +private class Argument extends Expr { + private Call call; + private int arg; + + Argument() { this = call.getArgument(arg) } + + /** Holds if this expression is the `i`th argument of `c`. */ + predicate isArgumentOf(Expr c, int i) { c = call and i = arg } +} + +/** A collection of cached types and predicates to be evaluated in the same stage. */ +cached +private module Cached { + cached + newtype TNode = + TExprNode(CfgNodes::ExprCfgNode n) or + TReturningNode(CfgNodes::ReturningCfgNode n) or + TSynthReturnNode(CfgScope scope, ReturnKind kind) { + exists(ReturningNode ret | + ret.(NodeImpl).getCfgScope() = scope and + ret.getKind() = kind + ) + } or + TSsaDefinitionNode(Ssa::Definition def) or + TNormalParameterNode(Parameter p) { not p instanceof BlockParameter } or + TSelfParameterNode(MethodBase m) or + TBlockParameterNode(MethodBase m) or + TExprPostUpdateNode(CfgNodes::ExprCfgNode n) { + exists(AstNode node | node = n.getNode() | + node instanceof Argument and + not node instanceof BlockArgument + or + n = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver() + ) + } or + TSummaryNode( + FlowSummaryImpl::Public::SummarizedCallable c, + FlowSummaryImpl::Private::SummaryNodeState state + ) { + FlowSummaryImpl::Private::summaryNodeRange(c, state) + } or + TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, int i) { + FlowSummaryImpl::Private::summaryParameterNodeRange(c, i) + } + + class TParameterNode = + TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode; + + private predicate defaultValueFlow(NamedParameter p, ExprNode e) { + p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr() + or + p.(KeywordParameter).getDefaultValue() = e.getExprNode().getExpr() + } + + private predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) { + LocalFlow::localSsaFlowStep(_, nodeFrom, nodeTo) + or + nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and + nodeTo.asExpr().getExpr() instanceof Self + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs() + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::BlockArgumentCfgNode).getValue() + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::StmtSequenceCfgNode).getLastStmt() + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ConditionalExprCfgNode).getBranch(_) + or + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::CaseExprCfgNode).getBranch(_) + or + exists(CfgNodes::ExprCfgNode exprTo, ReturningStatementNode n | + nodeFrom = n and + exprTo = nodeTo.asExpr() and + n.getReturningNode().getNode() instanceof BreakStmt and + exprTo.getNode() instanceof Loop and + nodeTo.asExpr().getAPredecessor(any(SuccessorTypes::BreakSuccessor s)) = n.getReturningNode() + ) + or + nodeFrom.asExpr() = nodeTo.(ReturningStatementNode).getReturningNode().getReturnedValueNode() + or + nodeTo.asExpr() = + any(CfgNodes::ExprNodes::ForExprCfgNode for | + exists(SuccessorType s | + not s instanceof SuccessorTypes::BreakSuccessor and + exists(for.getAPredecessor(s)) + ) and + nodeFrom.asExpr() = for.getValue() + ) + } + + /** + * This is the local flow predicate that is used as a building block in global + * data flow. + */ + cached + predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { + localFlowStepCommon(nodeFrom, nodeTo) + or + defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom) + or + nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter()) + or + nodeTo.(SynthReturnNode).getAnInput() = nodeFrom + or + FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true) + } + + /** This is the local flow predicate that is exposed. */ + cached + predicate localFlowStepImpl(Node nodeFrom, Node nodeTo) { + localFlowStepCommon(nodeFrom, nodeTo) + or + defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom) + or + nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter()) + or + // Simple flow through library code is included in the exposed local + // step relation, even though flow is technically inter-procedural + FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true) + } + + /** This is the local flow predicate that is used in type tracking. */ + cached + predicate localFlowStepTypeTracker(Node nodeFrom, Node nodeTo) { + localFlowStepCommon(nodeFrom, nodeTo) + or + exists(NamedParameter p | + defaultValueFlow(p, nodeFrom) and + nodeTo = LocalFlow::getParameterDefNode(p) + ) + } + + cached + predicate isLocalSourceNode(Node n) { + n instanceof ParameterNode + or + // This case should not be needed once we have proper use-use flow + // for `self`. At that point, the `self`s returned by `trackInstance` + // in `DataFlowDispatch.qll` should refer to the post-update node, + // and we can remove this case. + n instanceof SelfArgumentNode + or + not localFlowStepTypeTracker+(any(Node e | + e instanceof ExprNode + or + e instanceof ParameterNode + ), n) + } + + cached + newtype TContent = TTodoContent() // stub +} + +import Cached + +/** Holds if `n` should be hidden from path explanations. */ +predicate nodeIsHidden(Node n) { + exists(Ssa::Definition def | def = n.(SsaDefinitionNode).getDefinition() | + def instanceof Ssa::PhiNode + ) + or + n instanceof SummaryNode + or + n instanceof SummaryParameterNode + or + n instanceof SynthReturnNode +} + +/** An SSA definition, viewed as a node in a data flow graph. */ +class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode { + Ssa::Definition def; + + SsaDefinitionNode() { this = TSsaDefinitionNode(def) } + + /** Gets the underlying SSA definition. */ + Ssa::Definition getDefinition() { result = def } + + override CfgScope getCfgScope() { result = def.getBasicBlock().getScope() } + + override Location getLocationImpl() { result = def.getLocation() } + + override string toStringImpl() { result = def.toString() } +} + +/** + * A value returning statement, viewed as a node in a data flow graph. + * + * Note that because of control-flow splitting, one `ReturningStmt` may correspond + * to multiple `ReturningStatementNode`s, just like it may correspond to multiple + * `ControlFlow::Node`s. + */ +class ReturningStatementNode extends NodeImpl, TReturningNode { + CfgNodes::ReturningCfgNode n; + + ReturningStatementNode() { this = TReturningNode(n) } + + /** Gets the expression corresponding to this node. */ + CfgNodes::ReturningCfgNode getReturningNode() { result = n } + + override CfgScope getCfgScope() { result = n.getScope() } + + override Location getLocationImpl() { result = n.getLocation() } + + override string toStringImpl() { result = n.toString() } +} + +private module ParameterNodes { + abstract class ParameterNodeImpl extends ParameterNode, NodeImpl { + abstract predicate isSourceParameterOf(Callable c, int i); + + override predicate isParameterOf(DataFlowCallable c, int i) { + this.isSourceParameterOf(c.asCallable(), i) + } + } + + /** + * The value of a normal parameter at function entry, viewed as a node in a data + * flow graph. + */ + class NormalParameterNode extends ParameterNodeImpl, TNormalParameterNode { + private Parameter parameter; + + NormalParameterNode() { this = TNormalParameterNode(parameter) } + + override Parameter getParameter() { result = parameter } + + override predicate isSourceParameterOf(Callable c, int i) { c.getParameter(i) = parameter } + + override CfgScope getCfgScope() { result = parameter.getCallable() } + + override Location getLocationImpl() { result = parameter.getLocation() } + + override string toStringImpl() { result = parameter.toString() } + } + + /** + * The value of the `self` parameter at function entry, viewed as a node in a data + * flow graph. + */ + class SelfParameterNode extends ParameterNodeImpl, TSelfParameterNode { + private MethodBase method; + + SelfParameterNode() { this = TSelfParameterNode(method) } + + final MethodBase getMethod() { result = method } + + override predicate isSourceParameterOf(Callable c, int i) { method = c and i = -1 } + + override CfgScope getCfgScope() { result = method } + + override Location getLocationImpl() { result = method.getLocation() } + + override string toStringImpl() { result = "self in " + method.toString() } + } + + /** + * The value of a block parameter at function entry, viewed as a node in a data + * flow graph. + */ + class BlockParameterNode extends ParameterNodeImpl, TBlockParameterNode { + private MethodBase method; + + BlockParameterNode() { this = TBlockParameterNode(method) } + + final MethodBase getMethod() { result = method } + + override Parameter getParameter() { + result = method.getAParameter() and result instanceof BlockParameter + } + + override predicate isSourceParameterOf(Callable c, int i) { c = method and i = -2 } + + override CfgScope getCfgScope() { result = method } + + override Location getLocationImpl() { + result = getParameter().getLocation() + or + not exists(getParameter()) and result = method.getLocation() + } + + override string toStringImpl() { + result = getParameter().toString() + or + not exists(getParameter()) and result = "&block" + } + } + + /** A parameter for a library callable with a flow summary. */ + class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode { + private FlowSummaryImpl::Public::SummarizedCallable sc; + private int pos; + + SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) } + + override predicate isSourceParameterOf(Callable c, int i) { none() } + + override predicate isParameterOf(DataFlowCallable c, int i) { sc = c and i = pos } + + override CfgScope getCfgScope() { none() } + + override DataFlowCallable getEnclosingCallable() { result = sc } + + override Location getLocationImpl() { none() } + + override string toStringImpl() { result = "parameter " + pos + " of " + sc } + } +} + +import ParameterNodes + +/** A data-flow node used to model flow summaries. */ +private class SummaryNode extends NodeImpl, TSummaryNode { + private FlowSummaryImpl::Public::SummarizedCallable c; + private FlowSummaryImpl::Private::SummaryNodeState state; + + SummaryNode() { this = TSummaryNode(c, state) } + + override CfgScope getCfgScope() { none() } + + override DataFlowCallable getEnclosingCallable() { result = c } + + override Location getLocationImpl() { none() } + + override string toStringImpl() { result = "[summary] " + state + " in " + c } +} + +/** A data-flow node that represents a call argument. */ +abstract class ArgumentNode extends Node { + /** Holds if this argument occurs at the given position in the given call. */ + predicate argumentOf(DataFlowCall call, int pos) { this.sourceArgumentOf(call.asCall(), pos) } + + abstract predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos); + + /** Gets the call in which this node is an argument. */ + final DataFlowCall getCall() { this.argumentOf(result, _) } +} + +private module ArgumentNodes { + /** A data-flow node that represents an explicit call argument. */ + class ExplicitArgumentNode extends ArgumentNode { + ExplicitArgumentNode() { + this.asExpr().getExpr() instanceof Argument and + not this.asExpr().getExpr() instanceof BlockArgument + } + + override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { + this.asExpr() = call.getArgument(pos) + } + } + + /** A data-flow node that represents the `self` argument of a call. */ + class SelfArgumentNode extends ArgumentNode { + SelfArgumentNode() { this.asExpr() = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver() } + + override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { + this.asExpr() = call.getReceiver() and + pos = -1 + } + } + + /** A data-flow node that represents a block argument. */ + class BlockArgumentNode extends ArgumentNode { + BlockArgumentNode() { + this.asExpr().getExpr() instanceof BlockArgument or + exists(CfgNodes::ExprNodes::CallCfgNode c | c.getBlock() = this.asExpr()) + } + + override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { + pos = -2 and + ( + this.asExpr() = call.getBlock() + or + exists(CfgNodes::ExprCfgNode arg, int n | + arg = call.getArgument(n) and + this.asExpr() = arg and + arg.getExpr() instanceof BlockArgument + ) + ) + } + } + + private class SummaryArgumentNode extends SummaryNode, ArgumentNode { + SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) } + + override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { none() } + + override predicate argumentOf(DataFlowCall call, int pos) { + FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos) + } + } +} + +import ArgumentNodes + +/** A data-flow node that represents a value syntactically returned by a callable. */ +abstract class ReturningNode extends Node { + /** Gets the kind of this return node. */ + abstract ReturnKind getKind(); +} + +/** A data-flow node that represents a value returned by a callable. */ +abstract class ReturnNode extends Node { + /** Gets the kind of this return node. */ + abstract ReturnKind getKind(); +} + +private module ReturnNodes { + private predicate isValid(CfgNodes::ReturningCfgNode node) { + exists(ReturningStmt stmt, Callable scope | + stmt = node.getNode() and + scope = node.getScope() + | + stmt instanceof ReturnStmt and + (scope instanceof Method or scope instanceof SingletonMethod or scope instanceof Lambda) + or + stmt instanceof NextStmt and + (scope instanceof Block or scope instanceof Lambda) + or + stmt instanceof BreakStmt and + (scope instanceof Block or scope instanceof Lambda) + ) + } + + /** + * A data-flow node that represents an expression returned by a callable, + * either using an explict `return` statement or as the expression of a method body. + */ + class ExplicitReturnNode extends ReturningNode, ReturningStatementNode { + ExplicitReturnNode() { + isValid(n) and + n.getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and + n.getScope() instanceof Callable + } + + override ReturnKind getKind() { + if n.getNode() instanceof BreakStmt + then result instanceof BreakReturnKind + else result instanceof NormalReturnKind + } + } + + class ExprReturnNode extends ReturningNode, ExprNode { + ExprReturnNode() { + this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and + this.(NodeImpl).getCfgScope() instanceof Callable + } + + override ReturnKind getKind() { result instanceof NormalReturnKind } + } + + /** + * A synthetic data-flow node for joining flow from different syntactic + * returns into a single node. + * + * This node only exists to avoid computing the product of a large fan-in + * with a large fan-out. + */ + class SynthReturnNode extends NodeImpl, ReturnNode, TSynthReturnNode { + private CfgScope scope; + private ReturnKind kind; + + SynthReturnNode() { this = TSynthReturnNode(scope, kind) } + + /** Gets a syntactic return node that flows into this synthetic node. */ + ReturningNode getAnInput() { + result.(NodeImpl).getCfgScope() = scope and + result.getKind() = kind + } + + override ReturnKind getKind() { result = kind } + + override CfgScope getCfgScope() { result = scope } + + override Location getLocationImpl() { result = scope.getLocation() } + + override string toStringImpl() { result = "return " + kind + " in " + scope } + } + + private class SummaryReturnNode extends SummaryNode, ReturnNode { + private ReturnKind rk; + + SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) } + + override ReturnKind getKind() { result = rk } + } +} + +import ReturnNodes + +/** A data-flow node that represents the output of a call. */ +abstract class OutNode extends Node { + /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */ + abstract DataFlowCall getCall(ReturnKind kind); +} + +private module OutNodes { + /** + * A data-flow node that reads a value returned directly by a callable, + * either via a call or a `yield` of a block. + */ + class ExprOutNode extends OutNode, ExprNode { + private DataFlowCall call; + + ExprOutNode() { call.asCall() = this.getExprNode() } + + override DataFlowCall getCall(ReturnKind kind) { + result = call and + kind instanceof NormalReturnKind + } + } + + private class SummaryOutNode extends SummaryNode, OutNode { + SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) } + + override DataFlowCall getCall(ReturnKind kind) { + FlowSummaryImpl::Private::summaryOutNode(result, this, kind) + } + } +} + +import OutNodes + +predicate jumpStep(Node pred, Node succ) { + SsaImpl::captureFlowIn(pred.(SsaDefinitionNode).getDefinition(), + succ.(SsaDefinitionNode).getDefinition()) + or + SsaImpl::captureFlowOut(pred.(SsaDefinitionNode).getDefinition(), + succ.(SsaDefinitionNode).getDefinition()) + or + exists(Self s, Method m | + s = succ.asExpr().getExpr() and + pred.(SelfParameterNode).getMethod() = m and + m = s.getEnclosingMethod() and + m != s.getEnclosingCallable() + ) + or + succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr() +} + +predicate storeStep(Node node1, Content c, Node node2) { + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2) +} + +predicate readStep(Node node1, Content c, Node node2) { + FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2) +} + +/** + * Holds if values stored inside content `c` are cleared at node `n`. For example, + * any value stored inside `f` is cleared at the pre-update node associated with `x` + * in `x.f = newValue`. + */ +predicate clearsContent(Node n, Content c) { + storeStep(_, c, n) + or + FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c) +} + +private newtype TDataFlowType = TTodoDataFlowType() + +class DataFlowType extends TDataFlowType { + string toString() { result = "" } +} + +/** Gets the type of `n` used for type pruning. */ +DataFlowType getNodeType(NodeImpl n) { any() } + +/** Gets a string representation of a `DataFlowType`. */ +string ppReprType(DataFlowType t) { result = t.toString() } + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() } + +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update. + */ +abstract class PostUpdateNode extends Node { + /** Gets the node before the state update. */ + abstract Node getPreUpdateNode(); +} + +private module PostUpdateNodes { + class ExprPostUpdateNode extends PostUpdateNode, NodeImpl, TExprPostUpdateNode { + private CfgNodes::ExprCfgNode e; + + ExprPostUpdateNode() { this = TExprPostUpdateNode(e) } + + override ExprNode getPreUpdateNode() { e = result.getExprNode() } + + override CfgScope getCfgScope() { result = e.getExpr().getCfgScope() } + + override Location getLocationImpl() { result = e.getLocation() } + + override string toStringImpl() { result = "[post] " + e.toString() } + } + + private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode { + private Node pre; + + SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) } + + override Node getPreUpdateNode() { result = pre } + } +} + +private import PostUpdateNodes + +/** A node that performs a type cast. */ +class CastNode extends Node { + CastNode() { this instanceof ReturningNode } +} + +class DataFlowExpr = CfgNodes::ExprCfgNode; + +int accessPathLimit() { result = 5 } + +/** + * Holds if access paths with `c` at their head always should be tracked at high + * precision. This disables adaptive access path precision for such access paths. + */ +predicate forceHighPrecision(Content c) { none() } + +/** The unit type. */ +private newtype TUnit = TMkUnit() + +/** The trivial type with a single element. */ +class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** + * Holds if `n` does not require a `PostUpdateNode` as it either cannot be + * modified or its modification cannot be observed, for example if it is a + * freshly created object that is not saved in a variable. + * + * This predicate is only used for consistency checks. + */ +predicate isImmutableOrUnobservable(Node n) { n instanceof BlockArgumentNode } + +/** + * Holds if the node `n` is unreachable when the call context is `call`. + */ +predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } + +newtype LambdaCallKind = + TYieldCallKind() or + TLambdaCallKind() + +/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */ +predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { + kind = TYieldCallKind() and + creation.asExpr().getExpr() = c.asCallable().(Block) + or + kind = TLambdaCallKind() and + ( + creation.asExpr().getExpr() = c.asCallable().(Lambda) + or + creation.asExpr() = + any(CfgNodes::ExprNodes::MethodCallCfgNode mc | + c.asCallable() = mc.getBlock().getExpr() and + mc.getExpr().getMethodName() = "lambda" + ) + ) +} + +/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */ +predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { + kind = TYieldCallKind() and + receiver.(BlockParameterNode).getMethod() = + call.asCall().getExpr().(YieldCall).getEnclosingMethod() + or + kind = TLambdaCallKind() and + call.asCall() = + any(CfgNodes::ExprNodes::MethodCallCfgNode mc | + receiver.asExpr() = mc.getReceiver() and + mc.getExpr().getMethodName() = "call" + ) + or + receiver = call.(SummaryCall).getReceiver() and + if receiver.(ParameterNode).isParameterOf(_, -2) + then kind = TYieldCallKind() + else kind = TLambdaCallKind() +} + +/** Extra data-flow steps needed for lambda flow analysis. */ +predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll new file mode 100644 index 00000000000..c8ad1ca1eaf --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll @@ -0,0 +1,210 @@ +private import ruby +private import DataFlowDispatch +private import DataFlowPrivate +private import codeql.ruby.CFG +private import codeql.ruby.typetracking.TypeTracker +private import codeql.ruby.dataflow.SSA +private import FlowSummaryImpl as FlowSummaryImpl + +/** + * An element, viewed as a node in a data flow graph. Either an expression + * (`ExprNode`) or a parameter (`ParameterNode`). + */ +class Node extends TNode { + /** Gets the expression corresponding to this node, if any. */ + CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() } + + /** Gets the parameter corresponding to this node, if any. */ + Parameter asParameter() { result = this.(ParameterNode).getParameter() } + + /** Gets a textual representation of this node. */ + // TODO: cache + final string toString() { result = this.(NodeImpl).toStringImpl() } + + /** Gets the location of this node. */ + // TODO: cache + final Location getLocation() { result = this.(NodeImpl).getLocationImpl() } + + DataFlowCallable getEnclosingCallable() { result = TCfgScope(this.(NodeImpl).getCfgScope()) } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** + * Gets a local source node from which data may flow to this node in zero or more local data-flow steps. + */ + LocalSourceNode getALocalSource() { result.flowsTo(this) } +} + +/** A data-flow node corresponding to a call in the control-flow graph. */ +class CallNode extends LocalSourceNode { + private CfgNodes::ExprNodes::CallCfgNode node; + + CallNode() { node = this.asExpr() } + + /** Gets the data-flow node corresponding to the receiver of the call corresponding to this data-flow node */ + Node getReceiver() { result.asExpr() = node.getReceiver() } + + /** Gets the data-flow node corresponding to the `n`th argument of the call corresponding to this data-flow node */ + Node getArgument(int n) { result.asExpr() = node.getArgument(n) } + + /** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */ + Node getKeywordArgument(string name) { result.asExpr() = node.getKeywordArgument(name) } +} + +/** + * An expression, viewed as a node in a data flow graph. + * + * Note that because of control-flow splitting, one `Expr` may correspond + * to multiple `ExprNode`s, just like it may correspond to multiple + * `ControlFlow::Node`s. + */ +class ExprNode extends Node, TExprNode { + private CfgNodes::ExprCfgNode n; + + ExprNode() { this = TExprNode(n) } + + /** Gets the expression corresponding to this node. */ + CfgNodes::ExprCfgNode getExprNode() { result = n } +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParameterNode extends Node, TParameterNode { + /** Gets the parameter corresponding to this node, if any. */ + Parameter getParameter() { none() } + + /** + * Holds if this node is the parameter of callable `c` at the specified + * (zero-based) position. + */ + predicate isParameterOf(DataFlowCallable c, int i) { none() } +} + +/** + * A data-flow node that is a source of local flow. + */ +class LocalSourceNode extends Node { + LocalSourceNode() { isLocalSourceNode(this) } + + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ + pragma[inline] + predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) } + + /** + * Gets a node that this node may flow to using one heap and/or interprocedural step. + * + * See `TypeTracker` for more details about how to use this. + */ + pragma[inline] + LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } +} + +predicate hasLocalSource(Node sink, Node source) { + // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // recursive case, so instead we check it explicitly here. + source = sink and + source instanceof LocalSourceNode + or + exists(Node mid | + hasLocalSource(mid, source) and + localFlowStepTypeTracker(mid, sink) + ) +} + +/** Gets a node corresponding to expression `e`. */ +ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e } + +/** + * Gets the node corresponding to the value of parameter `p` at function entry. + */ +ParameterNode parameterNode(Parameter p) { result.getParameter() = p } + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localFlowStep = localFlowStepImpl/2; + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } + +/** + * Holds if data can flow from `e1` to `e2` in zero or more + * local (intra-procedural) steps. + */ +predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) { + localFlow(exprNode(e1), exprNode(e2)) +} + +/** + * A reference contained in an object. This is either a field, a property, + * or an element in a collection. + */ +class Content extends TContent { + /** Gets a textual representation of this content. */ + string toString() { none() } + + /** Gets the location of this content. */ + Location getLocation() { none() } +} + +/** + * A guard that validates some expression. + * + * To use this in a configuration, extend the class and provide a + * characteristic predicate precisely specifying the guard, and override + * `checks` to specify what is being validated and in which branch. + * + * It is important that all extending classes in scope are disjoint. + */ +abstract class BarrierGuard extends CfgNodes::ExprCfgNode { + private ConditionBlock conditionBlock; + + BarrierGuard() { this = conditionBlock.getLastNode() } + + /** Holds if this guard controls block `b` upon evaluating to `branch`. */ + private predicate controlsBlock(BasicBlock bb, boolean branch) { + exists(SuccessorTypes::BooleanSuccessor s | s.getValue() = branch | + conditionBlock.controls(bb, s) + ) + } + + /** + * Holds if this guard validates `expr` upon evaluating to `branch`. + * For example, the following code validates `foo` when the condition + * `foo == "foo"` is true. + * ```ruby + * if foo == "foo" + * do_something + * else + * do_something_else + * end + * ``` + */ + abstract predicate checks(CfgNode expr, boolean branch); + + final Node getAGuardedNode() { + exists(boolean branch, CfgNodes::ExprCfgNode testedNode, Ssa::Definition def | + def.getARead() = testedNode and + def.getARead() = result.asExpr() and + this.checks(testedNode, branch) and + this.controlsBlock(result.asExpr().getBasicBlock(), branch) + ) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll new file mode 100644 index 00000000000..83076558ec4 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll @@ -0,0 +1,964 @@ +/** + * Provides classes and predicates for defining flow summaries. + * + * The definitions in this file are language-independent, and language-specific + * definitions are passed in via the `DataFlowImplSpecific` and + * `FlowSummaryImplSpecific` modules. + */ + +private import FlowSummaryImplSpecific +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +private import DataFlowImplCommon + +/** Provides classes and predicates for defining flow summaries. */ +module Public { + private import Private + + /** + * A component used in a flow summary. + * + * Either a parameter or an argument at a given position, a specific + * content type, or a return kind. + */ + class SummaryComponent extends TSummaryComponent { + /** Gets a textual representation of this summary component. */ + string toString() { + exists(Content c | this = TContentSummaryComponent(c) and result = c.toString()) + or + exists(int i | this = TParameterSummaryComponent(i) and result = "parameter " + i) + or + exists(int i | this = TArgumentSummaryComponent(i) and result = "argument " + i) + or + exists(ReturnKind rk | this = TReturnSummaryComponent(rk) and result = "return (" + rk + ")") + } + } + + /** Provides predicates for constructing summary components. */ + module SummaryComponent { + /** Gets a summary component for content `c`. */ + SummaryComponent content(Content c) { result = TContentSummaryComponent(c) } + + /** Gets a summary component for parameter `i`. */ + SummaryComponent parameter(int i) { result = TParameterSummaryComponent(i) } + + /** Gets a summary component for argument `i`. */ + SummaryComponent argument(int i) { result = TArgumentSummaryComponent(i) } + + /** Gets a summary component for a return of kind `rk`. */ + SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } + } + + /** + * A (non-empty) stack of summary components. + * + * A stack is used to represent where data is read from (input) or where it + * is written to (output). For example, an input stack `[Field f, Argument 0]` + * means that data is read from field `f` from the `0`th argument, while an + * output stack `[Field g, Return]` means that data is written to the field + * `g` of the returned object. + */ + class SummaryComponentStack extends TSummaryComponentStack { + /** Gets the head of this stack. */ + SummaryComponent head() { + this = TSingletonSummaryComponentStack(result) or + this = TConsSummaryComponentStack(result, _) + } + + /** Gets the tail of this stack, if any. */ + SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } + + /** Gets the length of this stack. */ + int length() { + this = TSingletonSummaryComponentStack(_) and result = 1 + or + result = 1 + this.tail().length() + } + + /** Gets the stack obtained by dropping the first `i` elements, if any. */ + SummaryComponentStack drop(int i) { + i = 0 and result = this + or + result = this.tail().drop(i - 1) + } + + /** Holds if this stack contains summary component `c`. */ + predicate contains(SummaryComponent c) { c = this.drop(_).head() } + + /** Gets a textual representation of this stack. */ + string toString() { + exists(SummaryComponent head, SummaryComponentStack tail | + head = this.head() and + tail = this.tail() and + result = head + " of " + tail + ) + or + exists(SummaryComponent c | + this = TSingletonSummaryComponentStack(c) and + result = c.toString() + ) + } + } + + /** Provides predicates for constructing stacks of summary components. */ + module SummaryComponentStack { + /** Gets a singleton stack containing `c`. */ + SummaryComponentStack singleton(SummaryComponent c) { + result = TSingletonSummaryComponentStack(c) + } + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + * + * Make sure to override `RequiredSummaryComponentStack::required()` in order + * to ensure that the constructed stack exists. + */ + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { + result = TConsSummaryComponentStack(head, tail) + } + + /** Gets a singleton stack for argument `i`. */ + SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) } + + /** Gets a singleton stack representing a return of kind `rk`. */ + SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } + } + + /** + * A class that exists for QL technical reasons only (the IPA type used + * to represent component stacks needs to be bounded). + */ + abstract class RequiredSummaryComponentStack extends SummaryComponentStack { + /** + * Holds if the stack obtained by pushing `head` onto `tail` is required. + */ + abstract predicate required(SummaryComponent c); + } + + /** A callable with a flow summary. */ + abstract class SummarizedCallable extends DataFlowCallable { + /** + * Holds if data may flow from `input` to `output` through this callable. + * + * `preservesValue` indicates whether this is a value-preserving step + * or a taint-step. + * + * Input specifications are restricted to stacks that end with + * `SummaryComponent::argument(_)`, preceded by zero or more + * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. + * + * Output specifications are restricted to stacks that end with + * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. + * + * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero + * or more `SummaryComponent::content(_)` components. + * + * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an + * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded + * by zero or more `SummaryComponent::content(_)` components. + */ + pragma[nomagic] + predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + none() + } + + /** + * Holds if values stored inside `content` are cleared on objects passed as + * the `i`th argument to this callable. + */ + pragma[nomagic] + predicate clearsContent(int i, Content content) { none() } + } +} + +/** + * Provides predicates for compiling flow summaries down to atomic local steps, + * read steps, and store steps. + */ +module Private { + private import Public + + newtype TSummaryComponent = + TContentSummaryComponent(Content c) or + TParameterSummaryComponent(int i) { parameterPosition(i) } or + TArgumentSummaryComponent(int i) { parameterPosition(i) } or + TReturnSummaryComponent(ReturnKind rk) + + private TSummaryComponent thisParam() { + result = TParameterSummaryComponent(instanceParameterPosition()) + } + + newtype TSummaryComponentStack = + TSingletonSummaryComponentStack(SummaryComponent c) or + TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { + tail.(RequiredSummaryComponentStack).required(head) + or + tail.(RequiredSummaryComponentStack).required(TParameterSummaryComponent(_)) and + head = thisParam() + } + + pragma[nomagic] + private predicate summary( + SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, + boolean preservesValue + ) { + c.propagatesFlow(input, output, preservesValue) + or + // observe side effects of callbacks on input arguments + c.propagatesFlow(output, input, preservesValue) and + preservesValue = true and + isCallbackParameter(input) and + isContentOfArgument(output) + or + // flow from the receiver of a callback into the instance-parameter + exists(SummaryComponentStack s, SummaryComponentStack callbackRef | + c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) + | + callbackRef = s.drop(_) and + (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and + input = callbackRef.tail() and + output = TConsSummaryComponentStack(thisParam(), input) and + preservesValue = true + ) + } + + private predicate isCallbackParameter(SummaryComponentStack s) { + s.head() = TParameterSummaryComponent(_) and exists(s.tail()) + } + + private predicate isContentOfArgument(SummaryComponentStack s) { + s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail()) + or + s = TSingletonSummaryComponentStack(TArgumentSummaryComponent(_)) + } + + private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, _, s, _) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TContentSummaryComponent(_) and + s = out.tail() + ) + or + // Add the argument node corresponding to the requested post-update node + inputState(c, s) and isCallbackParameter(s) + } + + private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, s, _, _) + or + exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TParameterSummaryComponent(_) and + s = out.tail() + ) + } + + private newtype TSummaryNodeState = + TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or + TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } + + /** + * A state used to break up (complex) flow summaries into atomic flow steps. + * For a flow summary + * + * ```ql + * propagatesFlow( + * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + * ) + * ``` + * + * the following states are used: + * + * - `TSummaryNodeInputState(SummaryComponentStack s)`: + * this state represents that the components in `s` _have been read_ from the + * input. + * - `TSummaryNodeOutputState(SummaryComponentStack s)`: + * this state represents that the components in `s` _remain to be written_ to + * the output. + */ + class SummaryNodeState extends TSummaryNodeState { + /** Holds if this state is a valid input state for `c`. */ + pragma[nomagic] + predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { + this = TSummaryNodeInputState(s) and + inputState(c, s) + } + + /** Holds if this state is a valid output state for `c`. */ + pragma[nomagic] + predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { + this = TSummaryNodeOutputState(s) and + outputState(c, s) + } + + /** Gets a textual representation of this state. */ + string toString() { + exists(SummaryComponentStack s | + this = TSummaryNodeInputState(s) and + result = "read: " + s + ) + or + exists(SummaryComponentStack s | + this = TSummaryNodeOutputState(s) and + result = "to write: " + s + ) + } + } + + /** + * Holds if `state` represents having read the `i`th argument for `c`. In this case + * we are not synthesizing a data-flow node, but instead assume that a relevant + * parameter node already exists. + */ + private predicate parameterReadState(SummarizedCallable c, SummaryNodeState state, int i) { + state.isInputState(c, SummaryComponentStack::argument(i)) + } + + /** + * Holds if a synthesized summary node is needed for the state `state` in summarized + * callable `c`. + */ + predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { + state.isInputState(c, _) and + not parameterReadState(c, state, _) + or + state.isOutputState(c, _) + } + + pragma[noinline] + private Node summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { + exists(SummaryNodeState state | state.isInputState(c, s) | + result = summaryNode(c, state) + or + exists(int i | + parameterReadState(c, state, i) and + result.(ParamNode).isParameterOf(c, i) + ) + ) + } + + pragma[noinline] + private Node summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { + exists(SummaryNodeState state | + state.isOutputState(c, s) and + result = summaryNode(c, state) + ) + } + + /** + * Holds if a write targets `post`, which is a post-update node for the `i`th + * parameter of `c`. + */ + private predicate isParameterPostUpdate(Node post, SummarizedCallable c, int i) { + post = summaryNodeOutputState(c, SummaryComponentStack::argument(i)) + } + + /** Holds if a parameter node is required for the `i`th parameter of `c`. */ + predicate summaryParameterNodeRange(SummarizedCallable c, int i) { + parameterReadState(c, _, i) + or + isParameterPostUpdate(_, c, i) + } + + private predicate callbackOutput( + SummarizedCallable c, SummaryComponentStack s, Node receiver, ReturnKind rk + ) { + any(SummaryNodeState state).isInputState(c, s) and + s.head() = TReturnSummaryComponent(rk) and + receiver = summaryNodeInputState(c, s.drop(1)) + } + + private predicate callbackInput( + SummarizedCallable c, SummaryComponentStack s, Node receiver, int i + ) { + any(SummaryNodeState state).isOutputState(c, s) and + s.head() = TParameterSummaryComponent(i) and + receiver = summaryNodeInputState(c, s.drop(1)) + } + + /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ + predicate summaryCallbackRange(SummarizedCallable c, Node receiver) { + callbackOutput(c, _, receiver, _) + or + callbackInput(c, _, receiver, _) + } + + /** + * Gets the type of synthesized summary node `n`. + * + * The type is computed based on the language-specific predicates + * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and + * `getCallbackReturnType()`. + */ + DataFlowType summaryNodeType(Node n) { + exists(Node pre | + summaryPostUpdateNode(n, pre) and + result = getNodeType(pre) + ) + or + exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | + n = summaryNodeInputState(c, s) and + ( + exists(Content cont | + head = TContentSummaryComponent(cont) and result = getContentType(cont) + ) + or + exists(ReturnKind rk | + head = TReturnSummaryComponent(rk) and + result = + getCallbackReturnType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c), + s.drop(1))), rk) + ) + ) + or + n = summaryNodeOutputState(c, s) and + ( + exists(Content cont | + head = TContentSummaryComponent(cont) and result = getContentType(cont) + ) + or + s.length() = 1 and + exists(ReturnKind rk | + head = TReturnSummaryComponent(rk) and + result = getReturnType(c, rk) + ) + or + exists(int i | head = TParameterSummaryComponent(i) | + result = + getCallbackParameterType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c), + s.drop(1))), i) + ) + ) + ) + } + + /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ + predicate summaryOutNode(DataFlowCall c, Node out, ReturnKind rk) { + exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver | + callbackOutput(callable, s, receiver, rk) and + out = summaryNodeInputState(callable, s) and + c = summaryDataFlowCall(receiver) + ) + } + + /** Holds if summary node `arg` is the `i`th argument of call `c`. */ + predicate summaryArgumentNode(DataFlowCall c, Node arg, int i) { + exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver | + callbackInput(callable, s, receiver, i) and + arg = summaryNodeOutputState(callable, s) and + c = summaryDataFlowCall(receiver) + ) + } + + /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ + predicate summaryPostUpdateNode(Node post, Node pre) { + exists(SummarizedCallable c, int i | + isParameterPostUpdate(post, c, i) and + pre.(ParamNode).isParameterOf(c, i) + ) + or + exists(SummarizedCallable callable, SummaryComponentStack s | + callbackInput(callable, s, _, _) and + pre = summaryNodeOutputState(callable, s) and + post = summaryNodeInputState(callable, s) + ) + } + + /** Holds if summary node `ret` is a return node of kind `rk`. */ + predicate summaryReturnNode(Node ret, ReturnKind rk) { + exists(SummarizedCallable callable, SummaryComponentStack s | + ret = summaryNodeOutputState(callable, s) and + s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) + ) + } + + /** Provides a compilation of flow summaries to atomic data-flow steps. */ + module Steps { + /** + * Holds if there is a local step from `pred` to `succ`, which is synthesized + * from a flow summary. + */ + predicate summaryLocalStep(Node pred, Node succ, boolean preservesValue) { + exists( + SummarizedCallable c, SummaryComponentStack inputContents, + SummaryComponentStack outputContents + | + summary(c, inputContents, outputContents, preservesValue) and + pred = summaryNodeInputState(c, inputContents) and + succ = summaryNodeOutputState(c, outputContents) + | + preservesValue = true + or + preservesValue = false and not summary(c, inputContents, outputContents, true) + ) + or + // If flow through a method updates a parameter from some input A, and that + // parameter also is returned through B, then we'd like a combined flow from A + // to B as well. As an example, this simplifies modeling of fluent methods: + // for `StringBuilder.append(x)` with a specified value flow from qualifier to + // return value and taint flow from argument 0 to the qualifier, then this + // allows us to infer taint flow from argument 0 to the return value. + succ instanceof ParamNode and summaryPostUpdateNode(pred, succ) and preservesValue = true + or + // Similarly we would like to chain together summaries where values get passed + // into callbacks along the way. + pred instanceof ArgNode and summaryPostUpdateNode(succ, pred) and preservesValue = true + } + + /** + * Holds if there is a read step of content `c` from `pred` to `succ`, which + * is synthesized from a flow summary. + */ + predicate summaryReadStep(Node pred, Content c, Node succ) { + exists(SummarizedCallable sc, SummaryComponentStack s | + pred = summaryNodeInputState(sc, s.drop(1)) and + succ = summaryNodeInputState(sc, s) and + SummaryComponent::content(c) = s.head() + ) + } + + /** + * Holds if there is a store step of content `c` from `pred` to `succ`, which + * is synthesized from a flow summary. + */ + predicate summaryStoreStep(Node pred, Content c, Node succ) { + exists(SummarizedCallable sc, SummaryComponentStack s | + pred = summaryNodeOutputState(sc, s) and + succ = summaryNodeOutputState(sc, s.drop(1)) and + SummaryComponent::content(c) = s.head() + ) + } + + /** + * Holds if values stored inside content `c` are cleared when passed as + * input of type `input` in `call`. + */ + predicate summaryClearsContent(ArgNode arg, Content c) { + exists(DataFlowCall call, int i | + viableCallable(call).(SummarizedCallable).clearsContent(i, c) and + arg.argumentOf(call, i) + ) + } + + pragma[nomagic] + private ParamNode summaryArgParam(ArgNode arg, ReturnKindExt rk, OutNodeExt out) { + exists(DataFlowCall call, int pos, SummarizedCallable callable | + arg.argumentOf(call, pos) and + viableCallable(call) = callable and + result.isParameterOf(callable, pos) and + out = rk.getAnOutNode(call) + ) + } + + /** + * Holds if `arg` flows to `out` using a simple flow summary, that is, a flow + * summary without reads and stores. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryThroughStep(ArgNode arg, Node out, boolean preservesValue) { + exists(ReturnKindExt rk, ReturnNodeExt ret | + summaryLocalStep(summaryArgParam(arg, rk, out), ret, preservesValue) and + ret.getKind() = rk + ) + } + + /** + * Holds if there is a read(+taint) of `c` from `arg` to `out` using a + * flow summary. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryGetterStep(ArgNode arg, Content c, Node out) { + exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret | + summaryReadStep(summaryArgParam(arg, rk, out), c, mid) and + summaryLocalStep(mid, ret, _) and + ret.getKind() = rk + ) + } + + /** + * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a + * flow summary. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summarySetterStep(ArgNode arg, Content c, Node out) { + exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret | + summaryLocalStep(summaryArgParam(arg, rk, out), mid, _) and + summaryStoreStep(mid, c, ret) and + ret.getKind() = rk + ) + } + + /** + * Holds if data is written into content `c` of argument `arg` using a flow summary. + * + * Depending on the type of `c`, this predicate may be relevant to include in the + * definition of `clearsContent()`. + */ + predicate summaryStoresIntoArg(Content c, Node arg) { + exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out | + exists(DataFlowCall call, SummarizedCallable callable | + getNodeEnclosingCallable(ret) = callable and + viableCallable(call) = callable and + summaryStoreStep(_, c, ret) and + ret.getKind() = pragma[only_bind_into](rk) and + out = rk.getAnOutNode(call) and + arg = out.getPreUpdateNode() + ) + ) + } + } + + /** + * Provides a means of translating externally (e.g., CSV) defined flow + * summaries into a `SummarizedCallable`s. + */ + module External { + /** Holds if `spec` is a relevant external specification. */ + private predicate relevantSpec(string spec) { + summaryElement(_, spec, _, _) or + summaryElement(_, _, spec, _) or + sourceElement(_, spec, _) or + sinkElement(_, spec, _) + } + + /** Holds if the `n`th component of specification `s` is `c`. */ + predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c } + + /** Holds if specification `s` has length `len`. */ + predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) } + + /** Gets the last component of specification `s`. */ + string specLast(string s) { + exists(int len | + specLength(s, len) and + specSplit(s, result, len - 1) + ) + } + + /** Holds if specification component `c` parses as parameter `n`. */ + predicate parseParam(string c, int n) { + specSplit(_, c, _) and + ( + c.regexpCapture("Parameter\\[([-0-9]+)\\]", 1).toInt() = n + or + exists(int n1, int n2 | + c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and + c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and + n = [n1 .. n2] + ) + ) + } + + /** Holds if specification component `c` parses as argument `n`. */ + predicate parseArg(string c, int n) { + specSplit(_, c, _) and + ( + c.regexpCapture("Argument\\[([-0-9]+)\\]", 1).toInt() = n + or + exists(int n1, int n2 | + c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and + c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and + n = [n1 .. n2] + ) + ) + } + + private SummaryComponent interpretComponent(string c) { + specSplit(_, c, _) and + ( + exists(int pos | parseArg(c, pos) and result = SummaryComponent::argument(pos)) + or + exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos)) + or + c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) + or + result = interpretComponentSpecific(c) + ) + } + + /** + * Holds if `spec` specifies summary component stack `stack`. + */ + predicate interpretSpec(string spec, SummaryComponentStack stack) { + interpretSpec(spec, 0, stack) + } + + private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) { + exists(string c | + relevantSpec(spec) and + specLength(spec, idx + 1) and + specSplit(spec, c, idx) and + stack = SummaryComponentStack::singleton(interpretComponent(c)) + ) + or + exists(SummaryComponent head, SummaryComponentStack tail | + interpretSpec(spec, idx, head, tail) and + stack = SummaryComponentStack::push(head, tail) + ) + } + + private predicate interpretSpec( + string output, int idx, SummaryComponent head, SummaryComponentStack tail + ) { + exists(string c | + interpretSpec(output, idx + 1, tail) and + specSplit(output, c, idx) and + head = interpretComponent(c) + ) + } + + private class MkStack extends RequiredSummaryComponentStack { + MkStack() { interpretSpec(_, _, _, this) } + + override predicate required(SummaryComponent c) { interpretSpec(_, _, c, this) } + } + + private class SummarizedCallableExternal extends SummarizedCallable { + SummarizedCallableExternal() { summaryElement(this, _, _, _) } + + override predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + exists(string inSpec, string outSpec, string kind | + summaryElement(this, inSpec, outSpec, kind) and + interpretSpec(inSpec, input) and + interpretSpec(outSpec, output) + | + kind = "value" and preservesValue = true + or + kind = "taint" and preservesValue = false + ) + } + } + + /** Holds if component `c` of specification `spec` cannot be parsed. */ + predicate invalidSpecComponent(string spec, string c) { + specSplit(spec, c, _) and + not exists(interpretComponent(c)) + } + + private predicate inputNeedsReference(string c) { + c = "Argument" or + parseArg(c, _) + } + + private predicate outputNeedsReference(string c) { + c = "Argument" or + parseArg(c, _) or + c = "ReturnValue" + } + + private predicate sourceElementRef(InterpretNode ref, string output, string kind) { + exists(SourceOrSinkElement e | + sourceElement(e, output, kind) and + if outputNeedsReference(specLast(output)) + then e = ref.getCallTarget() + else e = ref.asElement() + ) + } + + private predicate sinkElementRef(InterpretNode ref, string input, string kind) { + exists(SourceOrSinkElement e | + sinkElement(e, input, kind) and + if inputNeedsReference(specLast(input)) + then e = ref.getCallTarget() + else e = ref.asElement() + ) + } + + private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) { + sourceElementRef(ref, output, _) and + specLength(output, idx) and + node = ref + or + exists(InterpretNode mid, string c | + interpretOutput(output, idx + 1, ref, mid) and + specSplit(output, c, idx) + | + exists(int pos | + node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos) + | + c = "Argument" or parseArg(c, pos) + ) + or + exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) | + c = "Parameter" or parseParam(c, pos) + ) + or + c = "ReturnValue" and + node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) + or + interpretOutputSpecific(c, mid, node) + ) + } + + private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) { + sinkElementRef(ref, input, _) and + specLength(input, idx) and + node = ref + or + exists(InterpretNode mid, string c | + interpretInput(input, idx + 1, ref, mid) and + specSplit(input, c, idx) + | + exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) | + c = "Argument" or parseArg(c, pos) + ) + or + exists(ReturnNodeExt ret | + c = "ReturnValue" and + ret = node.asNode() and + ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and + mid.asCallable() = getNodeEnclosingCallable(ret) + ) + or + interpretInputSpecific(c, mid, node) + ) + } + + /** + * Holds if `node` is specified as a source with the given kind in a CSV flow + * model. + */ + predicate isSourceNode(InterpretNode node, string kind) { + exists(InterpretNode ref, string output | + sourceElementRef(ref, output, kind) and + interpretOutput(output, 0, ref, node) + ) + } + + /** + * Holds if `node` is specified as a sink with the given kind in a CSV flow + * model. + */ + predicate isSinkNode(InterpretNode node, string kind) { + exists(InterpretNode ref, string input | + sinkElementRef(ref, input, kind) and + interpretInput(input, 0, ref, node) + ) + } + } + + /** Provides a query predicate for outputting a set of relevant flow summaries. */ + module TestOutput { + /** A flow summary to include in the `summary/3` query predicate. */ + abstract class RelevantSummarizedCallable extends SummarizedCallable { + /** Gets the string representation of this callable used by `summary/3`. */ + string getFullString() { result = this.toString() } + } + + /** A query predicate for outputting flow summaries in QL tests. */ + query predicate summary(string callable, string flow, boolean preservesValue) { + exists( + RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output + | + callable = c.getFullString() and + c.propagatesFlow(input, output, preservesValue) and + flow = input + " -> " + output + ) + } + } + + /** + * Provides query predicates for rendering the generated data flow graph for + * a summarized callable. + * + * Import this module into a `.ql` file of `@kind graph` to render the graph. + * The graph is restricted to callables from `RelevantSummarizedCallable`. + */ + module RenderSummarizedCallable { + /** A summarized callable to include in the graph. */ + abstract class RelevantSummarizedCallable extends SummarizedCallable { } + + private newtype TNodeOrCall = + MkNode(Node n) { + exists(RelevantSummarizedCallable c | + n = summaryNode(c, _) + or + n.(ParamNode).isParameterOf(c, _) + ) + } or + MkCall(DataFlowCall call) { + call = summaryDataFlowCall(_) and + call.getEnclosingCallable() instanceof RelevantSummarizedCallable + } + + private class NodeOrCall extends TNodeOrCall { + Node asNode() { this = MkNode(result) } + + DataFlowCall asCall() { this = MkCall(result) } + + string toString() { + result = this.asNode().toString() + or + result = this.asCall().toString() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + or + this.asCall().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + query predicate nodes(NodeOrCall n, string key, string val) { + key = "semmle.label" and val = n.toString() + } + + private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { + exists(boolean preservesValue | + Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and + if preservesValue = true then value = "value" else value = "taint" + ) + or + exists(Content c | + Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and + value = "read (" + c + ")" + or + Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and + value = "store (" + c + ")" + or + Private::Steps::summaryClearsContent(a.asNode(), c) and + b = a and + value = "clear (" + c + ")" + ) + or + summaryPostUpdateNode(b.asNode(), a.asNode()) and + value = "post-update" + or + b.asCall() = summaryDataFlowCall(a.asNode()) and + value = "receiver" + or + exists(int i | + summaryArgumentNode(b.asCall(), a.asNode(), i) and + value = "argument (" + i + ")" + ) + } + + query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { + key = "semmle.label" and + value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll new file mode 100644 index 00000000000..c373ffc883a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll @@ -0,0 +1,117 @@ +/** + * Provides Ruby specific classes and predicates for defining flow summaries. + */ + +private import ruby +private import DataFlowDispatch +private import DataFlowPrivate +private import DataFlowPublic +private import DataFlowImplCommon +private import FlowSummaryImpl::Private +private import FlowSummaryImpl::Public +private import codeql.ruby.dataflow.FlowSummary as FlowSummary + +/** Holds is `i` is a valid parameter position. */ +predicate parameterPosition(int i) { i in [-2 .. 10] } + +/** Gets the parameter position of the instance parameter. */ +int instanceParameterPosition() { none() } // disables implicit summary flow to `self` for callbacks + +/** Gets the synthesized summary data-flow node for the given values. */ +Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) } + +/** Gets the synthesized data-flow call for `receiver`. */ +SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() } + +/** Gets the type of content `c`. */ +DataFlowType getContentType(Content c) { any() } + +/** Gets the return type of kind `rk` for callable `c`. */ +bindingset[c, rk] +DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } + +/** + * Gets the type of the `i`th parameter in a synthesized call that targets a + * callback of type `t`. + */ +bindingset[t, i] +DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() } + +/** + * Gets the return type of kind `rk` in a synthesized call that targets a + * callback of type `t`. + */ +DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } + +/** + * Holds if an external flow summary exists for `c` with input specification + * `input`, output specification `output`, and kind `kind`. + */ +predicate summaryElement(DataFlowCallable c, string input, string output, string kind) { + exists(FlowSummary::SummarizedCallable sc, boolean preservesValue | + sc.propagatesFlowExt(input, output, preservesValue) and + c.asLibraryCallable() = sc and + if preservesValue = true then kind = "value" else kind = "taint" + ) +} + +/** + * Gets the summary component for specification component `c`, if any. + * + * This covers all the Ruby-specific components of a flow summary, and + * is currently restricted to `"BlockArgument"`. + */ +SummaryComponent interpretComponentSpecific(string c) { + c = "BlockArgument" and + result = FlowSummary::SummaryComponent::block() +} + +/** Gets the return kind corresponding to specification `"ReturnValue"`. */ +NormalReturnKind getReturnValueKind() { any() } + +/** + * All definitions in this module are required by the shared implementation + * (for source/sink interpretation), but they are unused for Ruby, where + * we rely on API graphs instead. + */ +private module UnusedSourceSinkInterpretation { + /** + * Holds if an external source specification exists for `e` with output specification + * `output` and kind `kind`. + */ + predicate sourceElement(AstNode n, string output, string kind) { none() } + + /** + * Holds if an external sink specification exists for `n` with input specification + * `input` and kind `kind`. + */ + predicate sinkElement(AstNode n, string input, string kind) { none() } + + class SourceOrSinkElement = AstNode; + + /** An entity used to interpret a source/sink specification. */ + class InterpretNode extends AstNode { + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement() { none() } + + /** Gets the data-flow node that this node corresponds to, if any. */ + Node asNode() { none() } + + /** Gets the call that this node corresponds to, if any. */ + DataFlowCall asCall() { none() } + + /** Gets the callable that this node corresponds to, if any. */ + DataFlowCallable asCallable() { none() } + + /** Gets the target of this call, if any. */ + Callable getCallTarget() { none() } + } + + /** Provides additional sink specification logic. */ + predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } + + /** Provides additional source specification logic. */ + predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } +} + +import UnusedSourceSinkInterpretation diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll new file mode 100644 index 00000000000..54269c5cb59 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll @@ -0,0 +1,289 @@ +private import SsaImplCommon +private import codeql.ruby.AST +private import codeql.ruby.CFG +private import codeql.ruby.ast.Variable +private import CfgNodes::ExprNodes + +/** Holds if `v` is uninitialized at index `i` in entry block `bb`. */ +predicate uninitializedWrite(EntryBasicBlock bb, int i, LocalVariable v) { + v.getDeclaringScope() = bb.getScope() and + i = -1 +} + +/** Holds if `bb` contains a caputured read of variable `v`. */ +pragma[noinline] +private predicate hasCapturedVariableRead(BasicBlock bb, LocalVariable v) { + exists(LocalVariableReadAccess read | + read = bb.getANode().getNode() and + read.isCapturedAccess() and + read.getVariable() = v + ) +} + +/** + * Holds if an entry definition is needed for captured variable `v` at index + * `i` in entry block `bb`. + */ +predicate capturedEntryWrite(EntryBasicBlock bb, int i, LocalVariable v) { + hasCapturedVariableRead(bb.getASuccessor*(), v) and + i = -1 +} + +/** Holds if `bb` contains a caputured write to variable `v`. */ +pragma[noinline] +private predicate writesCapturedVariable(BasicBlock bb, LocalVariable v) { + exists(LocalVariableWriteAccess write | + write = bb.getANode().getNode() and + write.isCapturedAccess() and + write.getVariable() = v + ) +} + +/** + * Holds if a pseudo read of captured variable `v` should be inserted + * at index `i` in exit block `bb`. + */ +private predicate capturedExitRead(AnnotatedExitBasicBlock bb, int i, LocalVariable v) { + bb.isNormal() and + writesCapturedVariable(bb.getAPredecessor*(), v) and + i = bb.length() +} + +private CfgScope getCaptureOuterCfgScope(CfgScope scope) { + result = scope.getOuterCfgScope() and + ( + scope instanceof Block + or + scope instanceof Lambda + ) +} + +/** Holds if captured variable `v` is read inside `scope`. */ +pragma[noinline] +private predicate hasCapturedRead(Variable v, CfgScope scope) { + any(LocalVariableReadAccess read | + read.getVariable() = v and scope = getCaptureOuterCfgScope*(read.getCfgScope()) + ).isCapturedAccess() +} + +pragma[noinline] +private predicate hasVariableWriteWithCapturedRead(BasicBlock bb, LocalVariable v, CfgScope scope) { + hasCapturedRead(v, scope) and + exists(VariableWriteAccess write | + write = bb.getANode().getNode() and + write.getVariable() = v and + bb.getScope() = scope.getOuterCfgScope() + ) +} + +/** + * Holds if the call at index `i` in basic block `bb` may reach a callable + * that reads captured variable `v`. + */ +private predicate capturedCallRead(BasicBlock bb, int i, LocalVariable v) { + exists(CfgScope scope | + hasVariableWriteWithCapturedRead(bb.getAPredecessor*(), v, scope) and + bb.getNode(i).getNode() instanceof Call + | + not scope instanceof Block + or + // If the read happens inside a block, we restrict to the call that + // contains the block + scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock() + ) +} + +/** Holds if captured variable `v` is written inside `scope`. */ +pragma[noinline] +private predicate hasCapturedWrite(Variable v, CfgScope scope) { + any(LocalVariableWriteAccess write | + write.getVariable() = v and scope = getCaptureOuterCfgScope*(write.getCfgScope()) + ).isCapturedAccess() +} + +/** Holds if `v` is read at index `i` in basic block `bb`. */ +private predicate variableReadActual(BasicBlock bb, int i, LocalVariable v) { + exists(VariableReadAccess read | + read.getVariable() = v and + read = bb.getNode(i).getNode() + ) +} + +predicate variableRead(BasicBlock bb, int i, LocalVariable v, boolean certain) { + variableReadActual(bb, i, v) and + certain = true + or + capturedCallRead(bb, i, v) and + certain = false + or + capturedExitRead(bb, i, v) and + certain = false +} + +pragma[noinline] +private predicate hasVariableReadWithCapturedWrite(BasicBlock bb, LocalVariable v, CfgScope scope) { + hasCapturedWrite(v, scope) and + exists(VariableReadAccess read | + read = bb.getANode().getNode() and + read.getVariable() = v and + bb.getScope() = scope.getOuterCfgScope() + ) +} + +cached +private module Cached { + /** + * Holds if the call at index `i` in basic block `bb` may reach a callable + * that writes captured variable `v`. + */ + cached + predicate capturedCallWrite(BasicBlock bb, int i, LocalVariable v) { + exists(CfgScope scope | + hasVariableReadWithCapturedWrite(bb.getASuccessor*(), v, scope) and + bb.getNode(i).getNode() instanceof Call + | + not scope instanceof Block + or + // If the write happens inside a block, we restrict to the call that + // contains the block + scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock() + ) + } + + /** + * Holds if `v` is written at index `i` in basic block `bb`, and the corresponding + * AST write access is `write`. + */ + cached + predicate variableWriteActual(BasicBlock bb, int i, LocalVariable v, VariableWriteAccess write) { + exists(AstNode n | + write.getVariable() = v and + n = bb.getNode(i).getNode() + | + write.isExplicitWrite(n) + or + write.isImplicitWrite() and + n = write + ) + } + + cached + VariableReadAccessCfgNode getARead(Definition def) { + exists(LocalVariable v, BasicBlock bb, int i | + ssaDefReachesRead(v, def, bb, i) and + variableReadActual(bb, i, v) and + result = bb.getNode(i) + ) + } + + /** + * Holds if there is flow for a captured variable from the enclosing scope into a block. + * ```rb + * foo = 0 + * bar { + * puts foo + * } + * ``` + */ + cached + predicate captureFlowIn(Definition def, Definition entry) { + exists(LocalVariable v, BasicBlock bb, int i | + ssaDefReachesRead(v, def, bb, i) and + capturedCallRead(bb, i, v) and + exists(BasicBlock bb2, int i2 | + capturedEntryWrite(bb2, i2, v) and + entry.definesAt(v, bb2, i2) + ) + ) + } + + /** + * Holds if there is outgoing flow for a captured variable that is updated in a block. + * ```rb + * foo = 0 + * bar { + * foo += 10 + * } + * puts foo + * ``` + */ + cached + predicate captureFlowOut(Definition def, Definition exit) { + exists(LocalVariable v, BasicBlock bb, int i | + ssaDefReachesRead(v, def, bb, i) and + capturedExitRead(bb, i, v) and + exists(BasicBlock bb2, int i2 | + capturedCallWrite(bb2, i2, v) and + exit.definesAt(v, bb2, i2) + ) + ) + } + + cached + Definition phiHasInputFromBlock(PhiNode phi, BasicBlock bb) { + phiHasInputFromBlock(phi, result, bb) + } + + /** + * Holds if the value defined at SSA definition `def` can reach a read at `read`, + * without passing through any other non-pseudo read. + */ + cached + predicate firstRead(Definition def, VariableReadAccessCfgNode read) { + exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 | + def.definesAt(_, bb1, i1) and + adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and + read = bb2.getNode(i2) + ) + } + + /** + * Holds if the read at `read2` is a read of the same SSA definition `def` + * as the read at `read1`, and `read2` can be reached from `read1` without + * passing through another non-pseudo read. + */ + cached + predicate adjacentReadPair( + Definition def, VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2 + ) { + exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 | + read1 = bb1.getNode(i1) and + variableReadActual(bb1, i1, _) and + adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and + read2 = bb2.getNode(i2) + ) + } + + /** + * Holds if the read of `def` at `read` may be a last read. That is, `read` + * can either reach another definition of the underlying source variable or + * the end of the CFG scope, without passing through another non-pseudo read. + */ + cached + predicate lastRead(Definition def, VariableReadAccessCfgNode read) { + exists(BasicBlock bb, int i | + lastRefNoUncertainReads(def, bb, i) and + variableReadActual(bb, i, _) and + read = bb.getNode(i) + ) + } + + /** + * Holds if the reference to `def` at index `i` in basic block `bb` can reach + * another definition `next` of the same underlying source variable, without + * passing through another write or non-pseudo read. + * + * The reference is either a read of `def` or `def` itself. + */ + cached + predicate lastRefBeforeRedef(Definition def, BasicBlock bb, int i, Definition next) { + lastRefRedefNoUncertainReads(def, bb, i, next) + } + + cached + Definition uncertainWriteDefinitionInput(UncertainWriteDefinition def) { + uncertainWriteDefinitionInput(def, result) + } +} + +import Cached diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll new file mode 100644 index 00000000000..884f4406d01 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll @@ -0,0 +1,637 @@ +/** + * Provides a language-independent implementation of static single assignment + * (SSA) form. + */ + +private import SsaImplSpecific + +private BasicBlock getABasicBlockPredecessor(BasicBlock bb) { getABasicBlockSuccessor(result) = bb } + +/** + * Liveness analysis (based on source variables) to restrict the size of the + * SSA representation. + */ +private module Liveness { + /** + * A classification of variable references into reads (of a given kind) and + * (certain or uncertain) writes. + */ + private newtype TRefKind = + Read(boolean certain) { certain in [false, true] } or + Write(boolean certain) { certain in [false, true] } + + private class RefKind extends TRefKind { + string toString() { + exists(boolean certain | this = Read(certain) and result = "read (" + certain + ")") + or + exists(boolean certain | this = Write(certain) and result = "write (" + certain + ")") + } + + int getOrder() { + this = Read(_) and + result = 0 + or + this = Write(_) and + result = 1 + } + } + + /** + * Holds if the `i`th node of basic block `bb` is a reference to `v` of kind `k`. + */ + private predicate ref(BasicBlock bb, int i, SourceVariable v, RefKind k) { + exists(boolean certain | variableRead(bb, i, v, certain) | k = Read(certain)) + or + exists(boolean certain | variableWrite(bb, i, v, certain) | k = Write(certain)) + } + + private newtype OrderedRefIndex = + MkOrderedRefIndex(int i, int tag) { + exists(RefKind rk | ref(_, i, _, rk) | tag = rk.getOrder()) + } + + private OrderedRefIndex refOrd(BasicBlock bb, int i, SourceVariable v, RefKind k, int ord) { + ref(bb, i, v, k) and + result = MkOrderedRefIndex(i, ord) and + ord = k.getOrder() + } + + /** + * Gets the (1-based) rank of the reference to `v` at the `i`th node of + * basic block `bb`, which has the given reference kind `k`. + * + * Reads are considered before writes when they happen at the same index. + */ + private int refRank(BasicBlock bb, int i, SourceVariable v, RefKind k) { + refOrd(bb, i, v, k, _) = + rank[result](int j, int ord, OrderedRefIndex res | + res = refOrd(bb, j, v, _, ord) + | + res order by j, ord + ) + } + + private int maxRefRank(BasicBlock bb, SourceVariable v) { + result = refRank(bb, _, v, _) and + not result + 1 = refRank(bb, _, v, _) + } + + /** + * Gets the (1-based) rank of the first reference to `v` inside basic block `bb` + * that is either a read or a certain write. + */ + private int firstReadOrCertainWrite(BasicBlock bb, SourceVariable v) { + result = + min(int r, RefKind k | + r = refRank(bb, _, v, k) and + k != Write(false) + | + r + ) + } + + /** + * Holds if source variable `v` is live at the beginning of basic block `bb`. + */ + predicate liveAtEntry(BasicBlock bb, SourceVariable v) { + // The first read or certain write to `v` inside `bb` is a read + refRank(bb, _, v, Read(_)) = firstReadOrCertainWrite(bb, v) + or + // There is no certain write to `v` inside `bb`, but `v` is live at entry + // to a successor basic block of `bb` + not exists(firstReadOrCertainWrite(bb, v)) and + liveAtExit(bb, v) + } + + /** + * Holds if source variable `v` is live at the end of basic block `bb`. + */ + predicate liveAtExit(BasicBlock bb, SourceVariable v) { + liveAtEntry(getABasicBlockSuccessor(bb), v) + } + + /** + * Holds if variable `v` is live in basic block `bb` at index `i`. + * The rank of `i` is `rnk` as defined by `refRank()`. + */ + private predicate liveAtRank(BasicBlock bb, int i, SourceVariable v, int rnk) { + exists(RefKind kind | rnk = refRank(bb, i, v, kind) | + rnk = maxRefRank(bb, v) and + liveAtExit(bb, v) + or + ref(bb, i, v, kind) and + kind = Read(_) + or + exists(RefKind nextKind | + liveAtRank(bb, _, v, rnk + 1) and + rnk + 1 = refRank(bb, _, v, nextKind) and + nextKind != Write(true) + ) + ) + } + + /** + * Holds if variable `v` is live after the (certain or uncertain) write at + * index `i` inside basic block `bb`. + */ + predicate liveAfterWrite(BasicBlock bb, int i, SourceVariable v) { + exists(int rnk | rnk = refRank(bb, i, v, Write(_)) | liveAtRank(bb, i, v, rnk)) + } +} + +private import Liveness + +/** Holds if `bb1` strictly dominates `bb2`. */ +private predicate strictlyDominates(BasicBlock bb1, BasicBlock bb2) { + bb1 = getImmediateBasicBlockDominator+(bb2) +} + +/** Holds if `bb1` dominates a predecessor of `bb2`. */ +private predicate dominatesPredecessor(BasicBlock bb1, BasicBlock bb2) { + exists(BasicBlock pred | pred = getABasicBlockPredecessor(bb2) | + bb1 = pred + or + strictlyDominates(bb1, pred) + ) +} + +/** Holds if `df` is in the dominance frontier of `bb`. */ +private predicate inDominanceFrontier(BasicBlock bb, BasicBlock df) { + dominatesPredecessor(bb, df) and + not strictlyDominates(bb, df) +} + +/** + * Holds if `bb` is in the dominance frontier of a block containing a + * definition of `v`. + */ +pragma[noinline] +private predicate inDefDominanceFrontier(BasicBlock bb, SourceVariable v) { + exists(BasicBlock defbb, Definition def | + def.definesAt(v, defbb, _) and + inDominanceFrontier(defbb, bb) + ) +} + +cached +newtype TDefinition = + TWriteDef(SourceVariable v, BasicBlock bb, int i) { + variableWrite(bb, i, v, _) and + liveAfterWrite(bb, i, v) + } or + TPhiNode(SourceVariable v, BasicBlock bb) { + inDefDominanceFrontier(bb, v) and + liveAtEntry(bb, v) + } + +private module SsaDefReaches { + newtype TSsaRefKind = + SsaRead() or + SsaDef() + + /** + * A classification of SSA variable references into reads and definitions. + */ + class SsaRefKind extends TSsaRefKind { + string toString() { + this = SsaRead() and + result = "SsaRead" + or + this = SsaDef() and + result = "SsaDef" + } + + int getOrder() { + this = SsaRead() and + result = 0 + or + this = SsaDef() and + result = 1 + } + } + + /** + * Holds if the `i`th node of basic block `bb` is a reference to `v`, + * either a read (when `k` is `SsaRead()`) or an SSA definition (when `k` + * is `SsaDef()`). + * + * Unlike `Liveness::ref`, this includes `phi` nodes. + */ + predicate ssaRef(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) { + variableRead(bb, i, v, _) and + k = SsaRead() + or + exists(Definition def | def.definesAt(v, bb, i)) and + k = SsaDef() + } + + private newtype OrderedSsaRefIndex = + MkOrderedSsaRefIndex(int i, SsaRefKind k) { ssaRef(_, i, _, k) } + + private OrderedSsaRefIndex ssaRefOrd(BasicBlock bb, int i, SourceVariable v, SsaRefKind k, int ord) { + ssaRef(bb, i, v, k) and + result = MkOrderedSsaRefIndex(i, k) and + ord = k.getOrder() + } + + /** + * Gets the (1-based) rank of the reference to `v` at the `i`th node of basic + * block `bb`, which has the given reference kind `k`. + * + * For example, if `bb` is a basic block with a phi node for `v` (considered + * to be at index -1), reads `v` at node 2, and defines it at node 5, we have: + * + * ```ql + * ssaRefRank(bb, -1, v, SsaDef()) = 1 // phi node + * ssaRefRank(bb, 2, v, Read()) = 2 // read at node 2 + * ssaRefRank(bb, 5, v, SsaDef()) = 3 // definition at node 5 + * ``` + * + * Reads are considered before writes when they happen at the same index. + */ + int ssaRefRank(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) { + ssaRefOrd(bb, i, v, k, _) = + rank[result](int j, int ord, OrderedSsaRefIndex res | + res = ssaRefOrd(bb, j, v, _, ord) + | + res order by j, ord + ) + } + + int maxSsaRefRank(BasicBlock bb, SourceVariable v) { + result = ssaRefRank(bb, _, v, _) and + not result + 1 = ssaRefRank(bb, _, v, _) + } + + /** + * Holds if the SSA definition `def` reaches rank index `rnk` in its own + * basic block `bb`. + */ + predicate ssaDefReachesRank(BasicBlock bb, Definition def, int rnk, SourceVariable v) { + exists(int i | + rnk = ssaRefRank(bb, i, v, SsaDef()) and + def.definesAt(v, bb, i) + ) + or + ssaDefReachesRank(bb, def, rnk - 1, v) and + rnk = ssaRefRank(bb, _, v, SsaRead()) + } + + /** + * Holds if the SSA definition of `v` at `def` reaches index `i` in the same + * basic block `bb`, without crossing another SSA definition of `v`. + */ + predicate ssaDefReachesReadWithinBlock(SourceVariable v, Definition def, BasicBlock bb, int i) { + exists(int rnk | + ssaDefReachesRank(bb, def, rnk, v) and + rnk = ssaRefRank(bb, i, v, SsaRead()) + ) + } + + /** + * Holds if the SSA definition of `v` at `def` reaches uncertain SSA definition + * `redef` in the same basic block, without crossing another SSA definition of `v`. + */ + predicate ssaDefReachesUncertainDefWithinBlock( + SourceVariable v, Definition def, UncertainWriteDefinition redef + ) { + exists(BasicBlock bb, int rnk, int i | + ssaDefReachesRank(bb, def, rnk, v) and + rnk = ssaRefRank(bb, i, v, SsaDef()) - 1 and + redef.definesAt(v, bb, i) + ) + } + + /** + * Same as `ssaRefRank()`, but restricted to a particular SSA definition `def`. + */ + int ssaDefRank(Definition def, SourceVariable v, BasicBlock bb, int i, SsaRefKind k) { + v = def.getSourceVariable() and + result = ssaRefRank(bb, i, v, k) and + ( + ssaDefReachesRead(_, def, bb, i) + or + def.definesAt(_, bb, i) + ) + } + + /** + * Holds if the reference to `def` at index `i` in basic block `bb` is the + * last reference to `v` inside `bb`. + */ + pragma[noinline] + predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) { + ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v) + } + + predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) { + exists(ssaDefRank(def, v, bb, _, _)) + } + + pragma[noinline] + private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) { + ssaDefReachesEndOfBlock(bb, def, _) and + not defOccursInBlock(_, bb, def.getSourceVariable()) + } + + /** + * Holds if `def` is accessed in basic block `bb1` (either a read or a write), + * `bb2` is a transitive successor of `bb1`, `def` is live at the end of `bb1`, + * and the underlying variable for `def` is neither read nor written in any block + * on the path between `bb1` and `bb2`. + */ + predicate varBlockReaches(Definition def, BasicBlock bb1, BasicBlock bb2) { + defOccursInBlock(def, bb1, _) and + bb2 = getABasicBlockSuccessor(bb1) + or + exists(BasicBlock mid | + varBlockReaches(def, bb1, mid) and + ssaDefReachesThroughBlock(def, mid) and + bb2 = getABasicBlockSuccessor(mid) + ) + } + + /** + * Holds if `def` is accessed in basic block `bb1` (either a read or a write), + * `def` is read at index `i2` in basic block `bb2`, `bb2` is in a transitive + * successor block of `bb1`, and `def` is neither read nor written in any block + * on a path between `bb1` and `bb2`. + */ + predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) { + varBlockReaches(def, bb1, bb2) and + ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1 + } +} + +private import SsaDefReaches + +pragma[nomagic] +predicate liveThrough(BasicBlock bb, SourceVariable v) { + liveAtExit(bb, v) and + not ssaRef(bb, _, v, SsaDef()) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if the SSA definition of `v` at `def` reaches the end of basic + * block `bb`, at which point it is still live, without crossing another + * SSA definition of `v`. + */ +pragma[nomagic] +predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable v) { + exists(int last | last = maxSsaRefRank(bb, v) | + ssaDefReachesRank(bb, def, last, v) and + liveAtExit(bb, v) + ) + or + // The construction of SSA form ensures that each read of a variable is + // dominated by its definition. An SSA definition therefore reaches a + // control flow node if it is the _closest_ SSA definition that dominates + // the node. If two definitions dominate a node then one must dominate the + // other, so therefore the definition of _closest_ is given by the dominator + // tree. Thus, reaching definitions can be calculated in terms of dominance. + ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and + liveThrough(bb, pragma[only_bind_into](v)) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if `inp` is an input to the phi node `phi` along the edge originating in `bb`. + */ +pragma[nomagic] +predicate phiHasInputFromBlock(PhiNode phi, Definition inp, BasicBlock bb) { + exists(SourceVariable v, BasicBlock bbDef | + phi.definesAt(v, bbDef, _) and + getABasicBlockPredecessor(bbDef) = bb and + ssaDefReachesEndOfBlock(bb, inp, v) + ) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if the SSA definition of `v` at `def` reaches a read at index `i` in + * basic block `bb`, without crossing another SSA definition of `v`. The read + * is of kind `rk`. + */ +pragma[nomagic] +predicate ssaDefReachesRead(SourceVariable v, Definition def, BasicBlock bb, int i) { + ssaDefReachesReadWithinBlock(v, def, bb, i) + or + variableRead(bb, i, v, _) and + ssaDefReachesEndOfBlock(getABasicBlockPredecessor(bb), def, v) and + not ssaDefReachesReadWithinBlock(v, _, bb, i) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if `def` is accessed at index `i1` in basic block `bb1` (either a read + * or a write), `def` is read at index `i2` in basic block `bb2`, and there is a + * path between them without any read of `def`. + */ +pragma[nomagic] +predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) { + exists(int rnk | + rnk = ssaDefRank(def, _, bb1, i1, _) and + rnk + 1 = ssaDefRank(def, _, bb1, i2, SsaRead()) and + variableRead(bb1, i2, _, _) and + bb2 = bb1 + ) + or + lastSsaRef(def, _, bb1, i1) and + defAdjacentRead(def, bb1, bb2, i2) +} + +pragma[noinline] +private predicate adjacentDefRead( + Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v +) { + adjacentDefRead(def, bb1, i1, bb2, i2) and + v = def.getSourceVariable() +} + +private predicate adjacentDefReachesRead( + Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2 +) { + exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) | + ssaRef(bb1, i1, v, SsaDef()) + or + variableRead(bb1, i1, v, true) + ) + or + exists(BasicBlock bb3, int i3 | + adjacentDefReachesRead(def, bb1, i1, bb3, i3) and + variableRead(bb3, i3, _, false) and + adjacentDefRead(def, bb3, i3, bb2, i2) + ) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Same as `adjacentDefRead`, but ignores uncertain reads. + */ +pragma[nomagic] +predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) { + adjacentDefReachesRead(def, bb1, i1, bb2, i2) and + variableRead(bb2, i2, _, true) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if the node at index `i` in `bb` is a last reference to SSA definition + * `def`. The reference is last because it can reach another write `next`, + * without passing through another read or write. + */ +pragma[nomagic] +predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) { + exists(SourceVariable v | + // Next reference to `v` inside `bb` is a write + exists(int rnk, int j | + rnk = ssaDefRank(def, v, bb, i, _) and + next.definesAt(v, bb, j) and + rnk + 1 = ssaRefRank(bb, j, v, SsaDef()) + ) + or + // Can reach a write using one or more steps + lastSsaRef(def, v, bb, i) and + exists(BasicBlock bb2 | + varBlockReaches(def, bb, bb2) and + 1 = ssaDefRank(next, v, bb2, _, SsaDef()) + ) + ) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if `inp` is an immediately preceding definition of uncertain definition + * `def`. Since `def` is uncertain, the value from the preceding definition might + * still be valid. + */ +pragma[nomagic] +predicate uncertainWriteDefinitionInput(UncertainWriteDefinition def, Definition inp) { + lastRefRedef(inp, _, _, def) +} + +private predicate adjacentDefReachesUncertainRead( + Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2 +) { + adjacentDefReachesRead(def, bb1, i1, bb2, i2) and + variableRead(bb2, i2, _, false) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Same as `lastRefRedef`, but ignores uncertain reads. + */ +pragma[nomagic] +predicate lastRefRedefNoUncertainReads(Definition def, BasicBlock bb, int i, Definition next) { + lastRefRedef(def, bb, i, next) and + not variableRead(bb, i, def.getSourceVariable(), false) + or + exists(BasicBlock bb0, int i0 | + lastRefRedef(def, bb0, i0, next) and + adjacentDefReachesUncertainRead(def, bb, i, bb0, i0) + ) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Holds if the node at index `i` in `bb` is a last reference to SSA + * definition `def`. + * + * That is, the node can reach the end of the enclosing callable, or another + * SSA definition for the underlying source variable, without passing through + * another read. + */ +pragma[nomagic] +predicate lastRef(Definition def, BasicBlock bb, int i) { + lastRefRedef(def, bb, i, _) + or + lastSsaRef(def, _, bb, i) and + ( + // Can reach exit directly + bb instanceof ExitBasicBlock + or + // Can reach a block using one or more steps, where `def` is no longer live + exists(BasicBlock bb2 | varBlockReaches(def, bb, bb2) | + not defOccursInBlock(def, bb2, _) and + not ssaDefReachesEndOfBlock(bb2, def, _) + ) + ) +} + +/** + * NB: If this predicate is exposed, it should be cached. + * + * Same as `lastRefRedef`, but ignores uncertain reads. + */ +pragma[nomagic] +predicate lastRefNoUncertainReads(Definition def, BasicBlock bb, int i) { + lastRef(def, bb, i) and + not variableRead(bb, i, def.getSourceVariable(), false) + or + exists(BasicBlock bb0, int i0 | + lastRef(def, bb0, i0) and + adjacentDefReachesUncertainRead(def, bb, i, bb0, i0) + ) +} + +/** A static single assignment (SSA) definition. */ +class Definition extends TDefinition { + /** Gets the source variable underlying this SSA definition. */ + SourceVariable getSourceVariable() { this.definesAt(result, _, _) } + + /** + * Holds if this SSA definition defines `v` at index `i` in basic block `bb`. + * Phi nodes are considered to be at index `-1`, while normal variable writes + * are at the index of the control flow node they wrap. + */ + final predicate definesAt(SourceVariable v, BasicBlock bb, int i) { + this = TWriteDef(v, bb, i) + or + this = TPhiNode(v, bb) and i = -1 + } + + /** Gets the basic block to which this SSA definition belongs. */ + final BasicBlock getBasicBlock() { this.definesAt(_, result, _) } + + /** Gets a textual representation of this SSA definition. */ + string toString() { none() } +} + +/** An SSA definition that corresponds to a write. */ +class WriteDefinition extends Definition, TWriteDef { + private SourceVariable v; + private BasicBlock bb; + private int i; + + WriteDefinition() { this = TWriteDef(v, bb, i) } + + override string toString() { result = "WriteDef" } +} + +/** A phi node. */ +class PhiNode extends Definition, TPhiNode { + override string toString() { result = "Phi" } +} + +/** + * An SSA definition that represents an uncertain update of the underlying + * source variable. + */ +class UncertainWriteDefinition extends WriteDefinition { + UncertainWriteDefinition() { + exists(SourceVariable v, BasicBlock bb, int i | + this.definesAt(v, bb, i) and + variableWrite(bb, i, v, false) + ) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll new file mode 100644 index 00000000000..76646f17e8d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll @@ -0,0 +1,34 @@ +/** Provides the Ruby specific parameters for `SsaImplCommon.qll`. */ + +private import SsaImpl as SsaImpl +private import codeql.ruby.AST +private import codeql.ruby.ast.Parameter +private import codeql.ruby.ast.Variable +private import codeql.ruby.controlflow.BasicBlocks as BasicBlocks +private import codeql.ruby.controlflow.ControlFlowGraph + +class BasicBlock = BasicBlocks::BasicBlock; + +BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() } + +BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() } + +class ExitBasicBlock = BasicBlocks::ExitBasicBlock; + +class SourceVariable = LocalVariable; + +predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) { + ( + SsaImpl::uninitializedWrite(bb, i, v) + or + SsaImpl::capturedEntryWrite(bb, i, v) + or + SsaImpl::variableWriteActual(bb, i, v, _) + ) and + certain = true + or + SsaImpl::capturedCallWrite(bb, i, v) and + certain = false +} + +predicate variableRead = SsaImpl::variableRead/4; diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll new file mode 100755 index 00000000000..86c8ffb7f50 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll @@ -0,0 +1,41 @@ +private import ruby +private import TaintTrackingPublic +private import codeql.ruby.CFG +private import codeql.ruby.DataFlow +private import FlowSummaryImpl as FlowSummaryImpl + +/** + * Holds if `node` should be a sanitizer in all global taint flow configurations + * but not in local taint. + */ +predicate defaultTaintSanitizer(DataFlow::Node node) { none() } + +/** + * Holds if default `TaintTracking::Configuration`s should allow implicit reads + * of `c` at sinks and inputs to additional taint steps. + */ +bindingset[node] +predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() } + +/** + * Holds if the additional step from `nodeFrom` to `nodeTo` should be included + * in all global taint flow configurations. + */ +cached +predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // operation involving `nodeFrom` + exists(CfgNodes::ExprNodes::OperationCfgNode op | + op = nodeTo.asExpr() and + op.getAnOperand() = nodeFrom.asExpr() and + not op.getExpr() instanceof AssignExpr + ) + or + // string interpolation of `nodeFrom` into `nodeTo` + nodeFrom.asExpr() = + nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent() + or + // element reference from nodeFrom + nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver() + or + FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll new file mode 100755 index 00000000000..3fe5659bdc7 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll @@ -0,0 +1,31 @@ +private import ruby +private import TaintTrackingPrivate +private import codeql.ruby.CFG +private import codeql.ruby.DataFlow +private import FlowSummaryImpl as FlowSummaryImpl + +/** + * Holds if taint propagates from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) } + +/** + * Holds if taint can flow from `e1` to `e2` in zero or more + * local (intra-procedural) steps. + */ +predicate localExprTaint(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) { + localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2)) +} + +/** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + defaultAdditionalTaintStep(nodeFrom, nodeTo) + or + // Simple flow through library code is included in the exposed local + // step relation, even though flow is technically inter-procedural + FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll new file mode 100644 index 00000000000..f4f73b8247c --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll @@ -0,0 +1,120 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** + * Holds if `sink` is a relevant taint sink. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + + /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ + predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) } + + /** + * Holds if the additional taint propagation step from `node1` to `node2` + * must be taken into account in the analysis. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) { + (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and + defaultImplicitTaintRead(node, c) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll new file mode 100644 index 00000000000..ce6f5ed1c48 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll @@ -0,0 +1,6 @@ +import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public + +module Private { + import codeql.ruby.DataFlow::DataFlow as DataFlow + import codeql.ruby.dataflow.internal.TaintTrackingPrivate +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll new file mode 100644 index 00000000000..18d12be3aac --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll @@ -0,0 +1,43 @@ +/** Provides classes for detecting generated code. */ + +private import ruby +private import codeql.ruby.ast.internal.TreeSitter + +/** A source file that contains generated code. */ +abstract class GeneratedCodeFile extends RubyFile { } + +/** A file contining comments suggesting it contains generated code. */ +class GeneratedCommentFile extends GeneratedCodeFile { + GeneratedCommentFile() { this = any(GeneratedCodeComment c).getLocation().getFile() } +} + +/** A comment line that indicates generated code. */ +abstract class GeneratedCodeComment extends Ruby::Comment { } + +/** + * A generic comment line that suggests that the file is generated. + */ +class GenericGeneratedCodeComment extends GeneratedCodeComment { + GenericGeneratedCodeComment() { + exists(string line, string entity, string was, string automatically | line = getValue() | + entity = "file|class|art[ei]fact|module|script" and + was = "was|is|has been" and + automatically = "automatically |mechanically |auto[- ]?" and + line.regexpMatch("(?i).*\\bThis (" + entity + ") (" + was + ") (" + automatically + + ")?generated\\b.*") + ) + } +} + +/** A comment warning against modifications. */ +class DontModifyMarkerComment extends GeneratedCodeComment { + DontModifyMarkerComment() { + exists(string line | line = getValue() | + line.regexpMatch("(?i).*\\bGenerated by\\b.*\\bDo not edit\\b.*") or + line.regexpMatch("(?i).*\\bAny modifications to this file will be lost\\b.*") + ) + } +} + +/** Holds if `file` looks like it contains generated code. */ +predicate isGeneratedCode(GeneratedCodeFile file) { any() } diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll new file mode 100644 index 00000000000..0eec1e15f58 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll @@ -0,0 +1,259 @@ +private import codeql.ruby.AST +private import codeql.ruby.Concepts +private import codeql.ruby.controlflow.CfgNodes +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.ast.internal.Module +private import ActionView + +private class ActionControllerBaseAccess extends ConstantReadAccess { + ActionControllerBaseAccess() { + this.getName() = "Base" and + this.getScopeExpr().(ConstantAccess).getName() = "ActionController" + } +} + +// ApplicationController extends ActionController::Base, but we +// treat it separately in case the ApplicationController definition +// is not in the database +private class ApplicationControllerAccess extends ConstantReadAccess { + ApplicationControllerAccess() { this.getName() = "ApplicationController" } +} + +/** + * A `ClassDeclaration` for a class that extends `ActionController::Base`. + * For example, + * + * ```rb + * class FooController < ActionController::Base + * def delete_handler + * uid = params[:id] + * User.delete_by("id = ?", uid) + * end + * end + * ``` + */ +class ActionControllerControllerClass extends ClassDeclaration { + ActionControllerControllerClass() { + // class FooController < ActionController::Base + this.getSuperclassExpr() instanceof ActionControllerBaseAccess + or + // class FooController < ApplicationController + this.getSuperclassExpr() instanceof ApplicationControllerAccess + or + // class BarController < FooController + exists(ActionControllerControllerClass other | + other.getModule() = resolveScopeExpr(this.getSuperclassExpr()) + ) + } + + /** + * Gets a `ActionControllerActionMethod` defined in this class. + */ + ActionControllerActionMethod getAnAction() { result = this.getAMethod() } +} + +/** + * An instance method defined within an `ActionController` controller class. + * This may be the target of a route handler, if such a route is defined. + */ +class ActionControllerActionMethod extends Method, HTTP::Server::RequestHandler::Range { + private ActionControllerControllerClass controllerClass; + + ActionControllerActionMethod() { this = controllerClass.getAMethod() } + + /** + * Establishes a mapping between a method within the file + * `app/controllers/_controller.rb` and the + * corresponding template file at + * `app/views//.html.erb`. + */ + ErbFile getDefaultTemplateFile() { + controllerTemplateFile(this.getControllerClass(), result) and + result.getBaseName() = this.getName() + ".html.erb" + } + + // params come from `params` method rather than a method parameter + override Parameter getARoutedParameter() { none() } + + override string getFramework() { result = "ActionController" } + + /** Gets a call to render from within this method. */ + RenderCall getARenderCall() { result.getParent+() = this } + + // TODO: model the implicit render call when a path through the method does + // not end at an explicit render or redirect + /** Gets the controller class containing this method. */ + ActionControllerControllerClass getControllerClass() { result = controllerClass } +} + +// A method call with a `self` receiver from within a controller class +private class ActionControllerContextCall extends MethodCall { + private ActionControllerControllerClass controllerClass; + + ActionControllerContextCall() { + this.getReceiver() instanceof Self and + this.getEnclosingModule() = controllerClass + } + + ActionControllerControllerClass getControllerClass() { result = controllerClass } +} + +/** + * A call to the `params` method to fetch the request parameters. + */ +abstract class ParamsCall extends MethodCall { + ParamsCall() { this.getMethodName() = "params" } +} + +/** + * A `RemoteFlowSource::Range` to represent accessing the + * ActionController parameters available via the `params` method. + */ +class ParamsSource extends RemoteFlowSource::Range { + ParamsCall call; + + ParamsSource() { this.asExpr().getExpr() = call } + + override string getSourceType() { result = "ActionController::Metal#params" } +} + +// A call to `params` from within a controller. +private class ActionControllerParamsCall extends ActionControllerContextCall, ParamsCall { } + +// A call to `render` from within a controller. +private class ActionControllerRenderCall extends ActionControllerContextCall, RenderCall { } + +// A call to `render_to` from within a controller. +private class ActionControllerRenderToCall extends ActionControllerContextCall, RenderToCall { } + +// A call to `html_safe` from within a controller. +private class ActionControllerHtmlSafeCall extends HtmlSafeCall { + ActionControllerHtmlSafeCall() { + this.getEnclosingModule() instanceof ActionControllerControllerClass + } +} + +// A call to `html_escape` from within a controller. +private class ActionControllerHtmlEscapeCall extends HtmlEscapeCall { + ActionControllerHtmlEscapeCall() { + this.getEnclosingModule() instanceof ActionControllerControllerClass + } +} + +/** + * A call to the `redirect_to` method, used in an action to redirect to a + * specific URL/path or to a different action in this controller. + */ +class RedirectToCall extends ActionControllerContextCall { + RedirectToCall() { this.getMethodName() = "redirect_to" } + + /** Gets the `Expr` representing the URL to redirect to, if any */ + Expr getRedirectUrl() { result = this.getArgument(0) } + + /** Gets the `ActionControllerActionMethod` to redirect to, if any */ + ActionControllerActionMethod getRedirectActionMethod() { + exists(string methodName | + methodName = this.getKeywordArgument("action").(StringlikeLiteral).getValueText() and + methodName = result.getName() and + result.getEnclosingModule() = this.getControllerClass() + ) + } +} + +/** + * A call to the `redirect_to` method, as an `HttpRedirectResponse`. + */ +class ActionControllerRedirectResponse extends HTTP::Server::HttpRedirectResponse::Range { + RedirectToCall redirectToCall; + + ActionControllerRedirectResponse() { this.asExpr().getExpr() = redirectToCall } + + override DataFlow::Node getBody() { none() } + + override DataFlow::Node getMimetypeOrContentTypeArg() { none() } + + override string getMimetypeDefault() { none() } + + override DataFlow::Node getRedirectLocation() { + result.asExpr().getExpr() = redirectToCall.getRedirectUrl() + } +} + +/** + * A method in an `ActionController` class that is accessible from within a + * Rails view as a helper method. For instance, in: + * + * ```rb + * class FooController < ActionController::Base + * helper_method :logged_in? + * def logged_in? + * @current_user != nil + * end + * end + * ``` + * + * the `logged_in?` method is a helper method. + * See also https://api.rubyonrails.org/classes/AbstractController/Helpers/ClassMethods.html#method-i-helper_method + */ +class ActionControllerHelperMethod extends Method { + private ActionControllerControllerClass controllerClass; + + ActionControllerHelperMethod() { + this.getEnclosingModule() = controllerClass and + exists(MethodCall helperMethodMarker | + helperMethodMarker.getMethodName() = "helper_method" and + helperMethodMarker.getAnArgument().(StringlikeLiteral).getValueText() = this.getName() and + helperMethodMarker.getEnclosingModule() = controllerClass + ) + } + + /** Gets the class containing this helper method. */ + ActionControllerControllerClass getControllerClass() { result = controllerClass } +} + +/** + * Gets an `ActionControllerControllerClass` associated with the given `ErbFile` + * according to Rails path conventions. + * For instance, a template file at `app/views/foo/bar/baz.html.erb` will be + * mapped to a controller class in `app/controllers/foo/bar/baz_controller.rb`, + * if such a controller class exists. + */ +ActionControllerControllerClass getAssociatedControllerClass(ErbFile f) { + // There is a direct mapping from template file to controller class + controllerTemplateFile(result, f) + or + // The template `f` is a partial, and it is rendered from within another + // template file, `fp`. In this case, `f` inherits the associated + // controller classes from `fp`. + f.isPartial() and + exists(RenderCall r, ErbFile fp | + r.getLocation().getFile() = fp and + r.getTemplateFile() = f and + result = getAssociatedControllerClass(fp) + ) +} + +// TODO: improve layout support, e.g. for `layout` method +// https://guides.rubyonrails.org/layouts_and_rendering.html +/** + * Holds if `templatesFile` is a viable file "belonging" to the given + * `ActionControllerControllerClass`, according to Rails conventions. + * + * This handles mappings between controllers in `app/controllers/`, and + * templates in `app/views/` and `app/views/layouts/`. + */ +predicate controllerTemplateFile(ActionControllerControllerClass cls, ErbFile templateFile) { + exists(string templatesPath, string sourcePrefix, string subPath, string controllerPath | + controllerPath = cls.getLocation().getFile().getRelativePath() and + templatesPath = templateFile.getParentContainer().getRelativePath() and + // `sourcePrefix` is either a prefix path ending in a slash, or empty if + // the rails app is at the source root + sourcePrefix = [controllerPath.regexpCapture("^(.*/)app/controllers/(?:.*?)/(?:[^/]*)$", 1), ""] and + controllerPath = sourcePrefix + "app/controllers/" + subPath + "_controller.rb" and + ( + templatesPath = sourcePrefix + "app/views/" + subPath or + templateFile.getRelativePath().matches(sourcePrefix + "app/views/layouts/" + subPath + "%") + ) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll new file mode 100644 index 00000000000..55638ab6584 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll @@ -0,0 +1,138 @@ +private import codeql.ruby.AST +private import codeql.ruby.Concepts +private import codeql.ruby.controlflow.CfgNodes +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.ast.internal.Module +private import ActionController + +predicate inActionViewContext(AstNode n) { + // Within a template + n.getLocation().getFile() instanceof ErbFile +} + +/** + * A method call on a string to mark it as HTML safe for Rails. + * Strings marked as such will not be automatically escaped when inserted into + * HTML. + */ +abstract class HtmlSafeCall extends MethodCall { + HtmlSafeCall() { this.getMethodName() = "html_safe" } +} + +// A call to `html_safe` from within a template. +private class ActionViewHtmlSafeCall extends HtmlSafeCall { + ActionViewHtmlSafeCall() { inActionViewContext(this) } +} + +/** + * A call to a method named "html_escape", "html_escape_once", or "h". + */ +abstract class HtmlEscapeCall extends MethodCall { + // "h" is aliased to "html_escape" in ActiveSupport + HtmlEscapeCall() { this.getMethodName() = ["html_escape", "html_escape_once", "h"] } +} + +class RailsHtmlEscaping extends Escaping::Range, DataFlow::CallNode { + RailsHtmlEscaping() { this.asExpr().getExpr() instanceof HtmlEscapeCall } + + override DataFlow::Node getAnInput() { result = this.getArgument(0) } + + override DataFlow::Node getOutput() { result = this } + + override string getKind() { result = Escaping::getHtmlKind() } +} + +// A call to `html_escape` from within a template. +private class ActionViewHtmlEscapeCall extends HtmlEscapeCall { + ActionViewHtmlEscapeCall() { inActionViewContext(this) } +} + +// A call in a context where some commonly used `ActionView` methods are available. +private class ActionViewContextCall extends MethodCall { + ActionViewContextCall() { + this.getReceiver() instanceof Self and + inActionViewContext(this) + } + + predicate isInErbFile() { this.getLocation().getFile() instanceof ErbFile } +} + +/** A call to the `raw` method to output a value without HTML escaping. */ +class RawCall extends ActionViewContextCall { + RawCall() { this.getMethodName() = "raw" } +} + +// A call to the `params` method within the context of a template. +private class ActionViewParamsCall extends ActionViewContextCall, ParamsCall { } + +/** + * A call to a `render` method that will populate the response body with the + * rendered content. + */ +abstract class RenderCall extends MethodCall { + RenderCall() { this.getMethodName() = "render" } + + private Expr getTemplatePathArgument() { + // TODO: support other ways of specifying paths (e.g. `file`) + result = [this.getKeywordArgument(["partial", "template", "action"]), this.getArgument(0)] + } + + private string getTemplatePathValue() { result = this.getTemplatePathArgument().getValueText() } + + // everything up to and including the final slash, but ignoring any leading slash + private string getSubPath() { + result = this.getTemplatePathValue().regexpCapture("^/?(.*/)?(?:[^/]*?)$", 1) + } + + // everything after the final slash, or the whole string if there is no slash + private string getBaseName() { + result = this.getTemplatePathValue().regexpCapture("^/?(?:.*/)?([^/]*?)$", 1) + } + + /** + * Gets the template file to be rendered by this call, if any. + */ + ErbFile getTemplateFile() { + result.getTemplateName() = this.getBaseName() and + result.getRelativePath().matches("%app/views/" + this.getSubPath() + "%") + } + + /** + * Get the local variables passed as context to the renderer + */ + HashLiteral getLocals() { result = this.getKeywordArgument("locals") } + // TODO: implicit renders in controller actions +} + +// A call to the `render` method within the context of a template. +private class ActionViewRenderCall extends RenderCall, ActionViewContextCall { } + +/** + * A render call that does not automatically set the HTTP response body. + */ +abstract class RenderToCall extends MethodCall { + RenderToCall() { this.getMethodName() = ["render_to_body", "render_to_string"] } +} + +// A call to `render_to` from within a template. +private class ActionViewRenderToCall extends ActionViewContextCall, RenderToCall { } + +/** + * A call to the ActionView `link_to` helper method. + * + * This generates an HTML anchor tag. The method is not designed to expect + * user-input, so provided paths are not automatically HTML escaped. + */ +class LinkToCall extends ActionViewContextCall { + LinkToCall() { this.getMethodName() = "link_to" } + + Expr getPathArgument() { + // When `link_to` is called with a block, it uses the first argument as the + // path, and otherwise the second argument. + exists(this.getBlock()) and result = this.getArgument(0) + or + not exists(this.getBlock()) and result = this.getArgument(1) + } +} +// TODO: model flow in/out of template files properly, diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll new file mode 100644 index 00000000000..2a13b51acfb --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -0,0 +1,319 @@ +private import codeql.ruby.AST +private import codeql.ruby.Concepts +private import codeql.ruby.controlflow.CfgNodes +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.internal.DataFlowDispatch +private import codeql.ruby.ast.internal.Module +private import codeql.ruby.ApiGraphs +private import codeql.ruby.frameworks.StandardLibrary + +private class ActiveRecordBaseAccess extends ConstantReadAccess { + ActiveRecordBaseAccess() { + this.getName() = "Base" and + this.getScopeExpr().(ConstantAccess).getName() = "ActiveRecord" + } +} + +// ApplicationRecord extends ActiveRecord::Base, but we +// treat it separately in case the ApplicationRecord definition +// is not in the database +private class ApplicationRecordAccess extends ConstantReadAccess { + ApplicationRecordAccess() { this.getName() = "ApplicationRecord" } +} + +/// See https://api.rubyonrails.org/classes/ActiveRecord/Persistence.html +private string activeRecordPersistenceInstanceMethodName() { + result = + [ + "becomes", "becomes!", "decrement", "decrement!", "delete", "delete!", "destroy", "destroy!", + "destroyed?", "increment", "increment!", "new_record?", "persisted?", + "previously_new_record?", "reload", "save", "save!", "toggle", "toggle!", "touch", "update", + "update!", "update_attribute", "update_column", "update_columns" + ] +} + +// Methods with these names are defined for all active record model instances, +// so they are unlikely to refer to a database field. +private predicate isBuiltInMethodForActiveRecordModelInstance(string methodName) { + methodName = activeRecordPersistenceInstanceMethodName() or + methodName = basicObjectInstanceMethodName() or + methodName = objectInstanceMethodName() +} + +/** + * A `ClassDeclaration` for a class that extends `ActiveRecord::Base`. For example, + * + * ```rb + * class UserGroup < ActiveRecord::Base + * has_many :users + * end + * ``` + */ +class ActiveRecordModelClass extends ClassDeclaration { + ActiveRecordModelClass() { + // class Foo < ActiveRecord::Base + this.getSuperclassExpr() instanceof ActiveRecordBaseAccess + or + // class Foo < ApplicationRecord + this.getSuperclassExpr() instanceof ApplicationRecordAccess + or + // class Bar < Foo + exists(ActiveRecordModelClass other | + other.getModule() = resolveScopeExpr(this.getSuperclassExpr()) + ) + } + + // Gets the class declaration for this class and all of its super classes + private ModuleBase getAllClassDeclarations() { + result = this.getModule().getSuperClass*().getADeclaration() + } + + /** + * Gets methods defined in this class that may access a field from the database. + */ + Method getAPotentialFieldAccessMethod() { + // It's a method on this class or one of its super classes + result = this.getAllClassDeclarations().getAMethod() and + // There is a value that can be returned by this method which may include field data + exists(DataFlow::Node returned, ActiveRecordInstanceMethodCall cNode, MethodCall c | + exprNodeReturnedFrom(returned, result) and + cNode.flowsTo(returned) and + c = cNode.asExpr().getExpr() + | + // The referenced method is not built-in, and... + not isBuiltInMethodForActiveRecordModelInstance(c.getMethodName()) and + ( + // ...The receiver does not have a matching method definition, or... + not exists( + cNode.getInstance().getClass().getAllClassDeclarations().getMethod(c.getMethodName()) + ) + or + // ...the called method can access a field + c.getATarget() = cNode.getInstance().getClass().getAPotentialFieldAccessMethod() + ) + ) + } +} + +/** A class method call whose receiver is an `ActiveRecordModelClass`. */ +class ActiveRecordModelClassMethodCall extends MethodCall { + private ActiveRecordModelClass recvCls; + + ActiveRecordModelClassMethodCall() { + // e.g. Foo.where(...) + recvCls.getModule() = resolveScopeExpr(this.getReceiver()) + or + // e.g. Foo.joins(:bars).where(...) + recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass() + or + // e.g. self.where(...) within an ActiveRecordModelClass + this.getReceiver() instanceof Self and + this.getEnclosingModule() = recvCls + } + + /** The `ActiveRecordModelClass` of the receiver of this method. */ + ActiveRecordModelClass getReceiverClass() { result = recvCls } +} + +private Expr sqlFragmentArgument(MethodCall call) { + exists(string methodName | + methodName = call.getMethodName() and + ( + methodName = + [ + "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!", + "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from", + "group", "having", "joins", "lock", "not", "order", "pluck", "where", "rewhere", "select", + "reselect", "update_all" + ] and + result = call.getArgument(0) + or + methodName = "calculate" and result = call.getArgument(1) + or + methodName in ["average", "count", "maximum", "minimum", "sum"] and + result = call.getArgument(0) + or + // This format was supported until Rails 2.3.8 + methodName = ["all", "find", "first", "last"] and + result = call.getKeywordArgument("conditions") + or + methodName = "reload" and + result = call.getKeywordArgument("lock") + ) + ) +} + +// An expression that, if tainted by unsanitized input, should not be used as +// part of an argument to an SQL executing method +private predicate unsafeSqlExpr(Expr sqlFragmentExpr) { + // Literals containing an interpolated value + exists(StringInterpolationComponent interpolated | + interpolated = sqlFragmentExpr.(StringlikeLiteral).getComponent(_) + ) + or + // String concatenations + sqlFragmentExpr instanceof AddExpr + or + // Variable reads + sqlFragmentExpr instanceof VariableReadAccess + or + // Method call + sqlFragmentExpr instanceof MethodCall +} + +/** + * A method call that may result in executing unintended user-controlled SQL + * queries if the `getSqlFragmentSinkArgument()` expression is tainted by + * unsanitized user-controlled input. For example, supposing that `User` is an + * `ActiveRecord` model class, then + * + * ```rb + * User.where("name = '#{user_name}'") + * ``` + * + * may be unsafe if `user_name` is from unsanitized user input, as a value such + * as `"') OR 1=1 --"` could result in the application looking up all users + * rather than just one with a matching name. + */ +class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall { + // The SQL fragment argument itself + private Expr sqlFragmentExpr; + + PotentiallyUnsafeSqlExecutingMethodCall() { + exists(Expr arg | + arg = sqlFragmentArgument(this) and + unsafeSqlExpr(sqlFragmentExpr) and + ( + sqlFragmentExpr = arg + or + sqlFragmentExpr = arg.(ArrayLiteral).getElement(0) + ) and + // Check that method has not been overridden + not exists(SingletonMethod m | + m.getName() = this.getMethodName() and + m.getOuterScope() = this.getReceiverClass() + ) + ) + } + + Expr getSqlFragmentSinkArgument() { result = sqlFragmentExpr } +} + +/** + * An `SqlExecution::Range` for an argument to a + * `PotentiallyUnsafeSqlExecutingMethodCall` that may be vulnerable to being + * controlled by user input. + */ +class ActiveRecordSqlExecutionRange extends SqlExecution::Range { + ActiveRecordSqlExecutionRange() { + exists(PotentiallyUnsafeSqlExecutingMethodCall mc | + this.asExpr().getNode() = mc.getSqlFragmentSinkArgument() + ) + } + + override DataFlow::Node getSql() { result = this } +} + +// TODO: model `ActiveRecord` sanitizers +// https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html +/** + * A node that may evaluate to one or more `ActiveRecordModelClass` instances. + */ +abstract class ActiveRecordModelInstantiation extends OrmInstantiation::Range, + DataFlow::LocalSourceNode { + abstract ActiveRecordModelClass getClass(); + + bindingset[methodName] + override predicate methodCallMayAccessField(string methodName) { + // The method is not a built-in, and... + not isBuiltInMethodForActiveRecordModelInstance(methodName) and + ( + // ...There is no matching method definition in the class, or... + not exists(this.getClass().getMethod(methodName)) + or + // ...the called method can access a field. + exists(Method m | m = this.getClass().getAPotentialFieldAccessMethod() | + m.getName() = methodName + ) + ) + } +} + +// Names of class methods on ActiveRecord models that may return one or more +// instances of that model. This also includes the `initialize` method. +// See https://api.rubyonrails.org/classes/ActiveRecord/FinderMethods.html +private string finderMethodName() { + exists(string baseName | + baseName = + [ + "fifth", "find", "find_by", "find_or_initialize_by", "find_or_create_by", "first", + "forty_two", "fourth", "last", "second", "second_to_last", "take", "third", "third_to_last" + ] and + result = baseName + ["", "!"] + ) + or + result = "new" +} + +// Gets the "final" receiver in a chain of method calls. +// For example, in `Foo.bar`, this would give the `Foo` access, and in +// `foo.bar.baz("arg")` it would give the `foo` variable access +private Expr getUltimateReceiver(MethodCall call) { + exists(Expr recv | + recv = call.getReceiver() and + ( + result = getUltimateReceiver(recv) + or + not recv instanceof MethodCall and result = recv + ) + ) +} + +// A call to `find`, `where`, etc. that may return active record model object(s) +private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode { + private MethodCall call; + private ActiveRecordModelClass cls; + private Expr recv; + + ActiveRecordModelFinderCall() { + call = this.asExpr().getExpr() and + recv = getUltimateReceiver(call) and + resolveConstant(recv) = cls.getQualifiedName() and + call.getMethodName() = finderMethodName() + } + + final override ActiveRecordModelClass getClass() { result = cls } +} + +// A `self` reference that may resolve to an active record model object +private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation { + private ActiveRecordModelClass cls; + + ActiveRecordModelClassSelfReference() { + exists(Self s | + s.getEnclosingModule() = cls and + s.getEnclosingMethod() = cls.getAMethod() and + s = this.asExpr().getExpr() + ) + } + + final override ActiveRecordModelClass getClass() { result = cls } +} + +// A (locally tracked) active record model object +private class ActiveRecordInstance extends DataFlow::Node { + private ActiveRecordModelInstantiation instantiation; + + ActiveRecordInstance() { this = instantiation or instantiation.flowsTo(this) } + + ActiveRecordModelClass getClass() { result = instantiation.getClass() } +} + +// A call whose receiver may be an active record model object +private class ActiveRecordInstanceMethodCall extends DataFlow::CallNode { + private ActiveRecordInstance instance; + + ActiveRecordInstanceMethodCall() { this.getReceiver() = instance } + + ActiveRecordInstance getInstance() { result = instance } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll new file mode 100644 index 00000000000..a7a963eb8a9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll @@ -0,0 +1,299 @@ +/** + * Provides classes for working with file system libraries. + */ + +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs +private import codeql.ruby.DataFlow +private import codeql.ruby.frameworks.StandardLibrary + +private DataFlow::Node ioInstanceInstantiation() { + result = API::getTopLevelMember("IO").getAnInstantiation() or + result = API::getTopLevelMember("IO").getAMethodCall(["for_fd", "open", "try_convert"]) +} + +private DataFlow::Node ioInstance() { + result = ioInstanceInstantiation() + or + exists(DataFlow::Node inst | + inst = ioInstance() and + inst.(DataFlow::LocalSourceNode).flowsTo(result) + ) +} + +// Match some simple cases where a path argument specifies a shell command to +// be executed. For example, the `"|date"` argument in `IO.read("|date")`, which +// will execute a shell command and read its output rather than reading from the +// filesystem. +private predicate pathArgSpawnsSubprocess(Expr arg) { + arg.(StringlikeLiteral).getValueText().charAt(0) = "|" +} + +private DataFlow::Node fileInstanceInstantiation() { + result = API::getTopLevelMember("File").getAnInstantiation() + or + result = API::getTopLevelMember("File").getAMethodCall("open") + or + // Calls to `Kernel.open` can yield `File` instances + result.(KernelMethodCall).getMethodName() = "open" and + // Assume that calls that don't invoke shell commands will instead open + // a file. + not pathArgSpawnsSubprocess(result.(KernelMethodCall).getArgument(0).asExpr().getExpr()) +} + +private DataFlow::Node fileInstance() { + result = fileInstanceInstantiation() + or + exists(DataFlow::Node inst | + inst = fileInstance() and + inst.(DataFlow::LocalSourceNode).flowsTo(result) + ) +} + +private string ioFileReaderClassMethodName() { + result = ["binread", "foreach", "read", "readlines", "try_convert"] +} + +private string ioFileReaderInstanceMethodName() { + result = + [ + "getbyte", "getc", "gets", "pread", "read", "read_nonblock", "readbyte", "readchar", + "readline", "readlines", "readpartial", "sysread" + ] +} + +private string ioFileReaderMethodName(boolean classMethodCall) { + classMethodCall = true and result = ioFileReaderClassMethodName() + or + classMethodCall = false and result = ioFileReaderInstanceMethodName() +} + +/** + * Classes and predicates for modeling the core `IO` module. + */ +module IO { + /** + * An instance of the `IO` class, for example in + * + * ```rb + * rand = IO.new(IO.sysopen("/dev/random", "r"), "r") + * rand_data = rand.read(32) + * ``` + * + * there are 3 `IOInstance`s - the call to `IO.new`, the assignment + * `rand = ...`, and the read access to `rand` on the second line. + */ + class IOInstance extends DataFlow::Node { + IOInstance() { + this = ioInstance() or + this = fileInstance() + } + } + + // "Direct" `IO` instances, i.e. cases where there is no more specific + // subtype such as `File` + private class IOInstanceStrict extends IOInstance { + IOInstanceStrict() { this = ioInstance() } + } + + /** + * A `DataFlow::CallNode` that reads data using the `IO` class. For example, + * the `IO.read call in: + * + * ```rb + * IO.read("|date") + * ``` + * + * returns the output of the `date` shell command, invoked as a subprocess. + * + * This class includes reads both from shell commands and reads from the + * filesystem. For working with filesystem accesses specifically, see + * `IOFileReader` or the `FileSystemReadAccess` concept. + */ + class IOReader extends DataFlow::CallNode { + private boolean classMethodCall; + private string api; + + IOReader() { + // Class methods + api = ["File", "IO"] and + classMethodCall = true and + this = API::getTopLevelMember(api).getAMethodCall(ioFileReaderMethodName(classMethodCall)) + or + // IO instance methods + classMethodCall = false and + api = "IO" and + exists(IOInstanceStrict ii | + this.getReceiver() = ii and + this.asExpr().getExpr().(MethodCall).getMethodName() = + ioFileReaderMethodName(classMethodCall) + ) + or + // File instance methods + classMethodCall = false and + api = "File" and + exists(File::FileInstance fi | + this.getReceiver() = fi and + this.asExpr().getExpr().(MethodCall).getMethodName() = + ioFileReaderMethodName(classMethodCall) + ) + // TODO: enumeration style methods such as `each`, `foreach`, etc. + } + + /** + * Returns the most specific core class used for this read, `IO` or `File` + */ + string getAPI() { result = api } + + predicate isClassMethodCall() { classMethodCall = true } + } + + /** + * A `DataFlow::CallNode` that reads data from the filesystem using the `IO` + * class. For example, the `IO.read call in: + * + * ```rb + * IO.read("foo.txt") + * ``` + * + * reads the file `foo.txt` and returns its contents as a string. + */ + class IOFileReader extends IOReader, FileSystemReadAccess::Range { + IOFileReader() { + this.getAPI() = "File" + or + this.isClassMethodCall() and + // Assume that calls that don't invoke shell commands will instead + // read from a file. + not pathArgSpawnsSubprocess(this.getArgument(0).asExpr().getExpr()) + } + + // TODO: can we infer a path argument for instance method calls? + // e.g. by tracing back to the instantiation of that instance + override DataFlow::Node getAPathArgument() { + result = this.getArgument(0) and this.isClassMethodCall() + } + + // This class represents calls that return data + override DataFlow::Node getADataNode() { result = this } + } +} + +/** + * Classes and predicates for modeling the core `File` module. + * + * Because `File` is a subclass of `IO`, all `FileInstance`s and + * `FileModuleReader`s are also `IOInstance`s and `IOModuleReader`s + * respectively. + */ +module File { + /** + * An instance of the `File` class, for example in + * + * ```rb + * f = File.new("foo.txt") + * puts f.read() + * ``` + * + * there are 3 `FileInstance`s - the call to `File.new`, the assignment + * `f = ...`, and the read access to `f` on the second line. + */ + class FileInstance extends IO::IOInstance { + FileInstance() { this = fileInstance() } + } + + /** + * A read using the `File` module, e.g. the `f.read` call in + * + * ```rb + * f = File.new("foo.txt") + * puts f.read() + * ``` + */ + class FileModuleReader extends IO::IOFileReader { + FileModuleReader() { this.getAPI() = "File" } + } + + /** + * A call to a `File` method that may return one or more filenames. + */ + class FileModuleFilenameSource extends FileNameSource, DataFlow::CallNode { + FileModuleFilenameSource() { + // Class methods + this = + API::getTopLevelMember("File") + .getAMethodCall([ + "absolute_path", "basename", "expand_path", "join", "path", "readlink", + "realdirpath", "realpath" + ]) + or + // Instance methods + exists(FileInstance fi | + this.getReceiver() = fi and + this.asExpr().getExpr().(MethodCall).getMethodName() = ["path", "to_path"] + ) + } + } + + private class FileModulePermissionModification extends FileSystemPermissionModification::Range, + DataFlow::CallNode { + private DataFlow::Node permissionArg; + + FileModulePermissionModification() { + exists(string methodName | this = API::getTopLevelMember("File").getAMethodCall(methodName) | + methodName in ["chmod", "lchmod"] and permissionArg = this.getArgument(0) + or + methodName = "mkfifo" and permissionArg = this.getArgument(1) + or + methodName in ["new", "open"] and permissionArg = this.getArgument(2) + // TODO: defaults for optional args? This may depend on the umask + ) + } + + override DataFlow::Node getAPermissionNode() { result = permissionArg } + } +} + +/** + * Classes and predicates for modeling the `FileUtils` module from the standard + * library. + */ +module FileUtils { + /** + * A call to a FileUtils method that may return one or more filenames. + */ + class FileUtilsFilenameSource extends FileNameSource { + FileUtilsFilenameSource() { + // Note that many methods in FileUtils accept a `noop` option that will + // perform a dry run of the command. This means that, for instance, `rm` + // and similar methods may not actually delete/unlink a file when called. + this = + API::getTopLevelMember("FileUtils") + .getAMethodCall([ + "chmod", "chmod_R", "chown", "chown_R", "getwd", "makedirs", "mkdir", "mkdir_p", + "mkpath", "remove", "remove_dir", "remove_entry", "rm", "rm_f", "rm_r", "rm_rf", + "rmdir", "rmtree", "safe_unlink", "touch" + ]) + } + } + + private class FileUtilsPermissionModification extends FileSystemPermissionModification::Range, + DataFlow::CallNode { + private DataFlow::Node permissionArg; + + FileUtilsPermissionModification() { + exists(string methodName | + this = API::getTopLevelMember("FileUtils").getAMethodCall(methodName) + | + methodName in ["chmod", "chmod_R"] and permissionArg = this.getArgument(0) + or + methodName in ["install", "makedirs", "mkdir", "mkdir_p", "mkpath"] and + permissionArg = this.getKeywordArgument("mode") + // TODO: defaults for optional args? This may depend on the umask + ) + } + + override DataFlow::Node getAPermissionNode() { result = permissionArg } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll new file mode 100644 index 00000000000..acb902694fe --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll @@ -0,0 +1,12 @@ +/** + * Helper file that imports all HTTP clients. + */ + +private import codeql.ruby.frameworks.http_clients.NetHttp +private import codeql.ruby.frameworks.http_clients.Excon +private import codeql.ruby.frameworks.http_clients.Faraday +private import codeql.ruby.frameworks.http_clients.RestClient +private import codeql.ruby.frameworks.http_clients.Httparty +private import codeql.ruby.frameworks.http_clients.HttpClient +private import codeql.ruby.frameworks.http_clients.OpenURI +private import codeql.ruby.frameworks.http_clients.Typhoeus diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll new file mode 100644 index 00000000000..f6d883cd6b7 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll @@ -0,0 +1,337 @@ +private import codeql.ruby.AST +private import codeql.ruby.Concepts +private import codeql.ruby.DataFlow +private import codeql.ruby.ApiGraphs + +/** + * The `Kernel` module is included by the `Object` class, so its methods are available + * in every Ruby object. In addition, its module methods can be called by + * providing a specific receiver as in `Kernel.exit`. + */ +class KernelMethodCall extends DataFlow::CallNode { + private MethodCall methodCall; + + KernelMethodCall() { + methodCall = this.asExpr().getExpr() and + ( + this = API::getTopLevelMember("Kernel").getAMethodCall(_) + or + methodCall instanceof UnknownMethodCall and + ( + this.getReceiver().asExpr().getExpr() instanceof Self and + isPrivateKernelMethod(methodCall.getMethodName()) + or + isPublicKernelMethod(methodCall.getMethodName()) + ) + ) + } + + string getMethodName() { result = methodCall.getMethodName() } + + int getNumberOfArguments() { result = methodCall.getNumberOfArguments() } +} + +/** + * Public methods in the `Kernel` module. These can be invoked on any object via the usual dot syntax. + * ```ruby + * arr = [] + * arr.send("push", 5) # => [5] + * ``` + */ +private predicate isPublicKernelMethod(string method) { + method in ["class", "clone", "frozen?", "tap", "then", "yield_self", "send"] +} + +/** + * Private methods in the `Kernel` module. + * These can be be invoked on `self`, on `Kernel`, or using a low-level primitive like `send` or `instance_eval`. + * ```ruby + * puts "hello world" + * Kernel.puts "hello world" + * 5.instance_eval { puts "hello world" } + * 5.send("puts", "hello world") + * ``` + */ +private predicate isPrivateKernelMethod(string method) { + method in [ + "Array", "Complex", "Float", "Hash", "Integer", "Rational", "String", "__callee__", "__dir__", + "__method__", "`", "abort", "at_exit", "autoload", "autoload?", "binding", "block_given?", + "callcc", "caller", "caller_locations", "catch", "chomp", "chop", "eval", "exec", "exit", + "exit!", "fail", "fork", "format", "gets", "global_variables", "gsub", "iterator?", "lambda", + "load", "local_variables", "loop", "open", "p", "pp", "print", "printf", "proc", "putc", + "puts", "raise", "rand", "readline", "readlines", "require", "require_relative", "select", + "set_trace_func", "sleep", "spawn", "sprintf", "srand", "sub", "syscall", "system", "test", + "throw", "trace_var", "trap", "untrace_var", "warn" + ] +} + +string basicObjectInstanceMethodName() { + result in [ + "equal?", "instance_eval", "instance_exec", "method_missing", "singleton_method_added", + "singleton_method_removed", "singleton_method_undefined" + ] +} + +/** + * Instance methods on `BasicObject`, which are available to all classes. + */ +class BasicObjectInstanceMethodCall extends UnknownMethodCall { + BasicObjectInstanceMethodCall() { this.getMethodName() = basicObjectInstanceMethodName() } +} + +string objectInstanceMethodName() { + result in [ + "!~", "<=>", "===", "=~", "callable_methods", "define_singleton_method", "display", + "do_until", "do_while", "dup", "enum_for", "eql?", "extend", "f", "freeze", "h", "hash", + "inspect", "instance_of?", "instance_variable_defined?", "instance_variable_get", + "instance_variable_set", "instance_variables", "is_a?", "itself", "kind_of?", + "matching_methods", "method", "method_missing", "methods", "nil?", "object_id", + "private_methods", "protected_methods", "public_method", "public_methods", "public_send", + "remove_instance_variable", "respond_to?", "respond_to_missing?", "send", + "shortest_abbreviation", "singleton_class", "singleton_method", "singleton_methods", "taint", + "tainted?", "to_enum", "to_s", "trust", "untaint", "untrust", "untrusted?" + ] +} + +/** + * Instance methods on `Object`, which are available to all classes except `BasicObject`. + */ +class ObjectInstanceMethodCall extends UnknownMethodCall { + ObjectInstanceMethodCall() { this.getMethodName() = objectInstanceMethodName() } +} + +/** + * Method calls which have no known target. + * These will typically be calls to methods inherited from a superclass. + */ +class UnknownMethodCall extends MethodCall { + UnknownMethodCall() { not exists(this.(Call).getATarget()) } +} + +/** + * A system command executed via subshell literal syntax. + * E.g. + * ```ruby + * `cat foo.txt` + * %x(cat foo.txt) + * %x[cat foo.txt] + * %x{cat foo.txt} + * %x/cat foo.txt/ + * ``` + */ +class SubshellLiteralExecution extends SystemCommandExecution::Range { + SubshellLiteral literal; + + SubshellLiteralExecution() { this.asExpr().getExpr() = literal } + + override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = literal.getComponent(_) } + + override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() } +} + +/** + * A system command executed via shell heredoc syntax. + * E.g. + * ```ruby + * <<`EOF` + * cat foo.text + * EOF + * ``` + */ +class SubshellHeredocExecution extends SystemCommandExecution::Range { + HereDoc heredoc; + + SubshellHeredocExecution() { this.asExpr().getExpr() = heredoc and heredoc.isSubShell() } + + override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = heredoc.getComponent(_) } + + override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() } +} + +/** + * A system command executed via the `Kernel.system` method. + * `Kernel.system` accepts three argument forms: + * - A single string. If it contains no shell meta characters, keywords or + * builtins, it is executed directly in a subprocess. + * Otherwise, it is executed in a subshell. + * ```ruby + * system("cat foo.txt | tail") + * ``` + * - A command and one or more arguments. + * The command is executed in a subprocess. + * ```ruby + * system("cat", "foo.txt") + * ``` + * - An array containing the command name and argv[0], followed by zero or more arguments. + * The command is executed in a subprocess. + * ```ruby + * system(["cat", "cat"], "foo.txt") + * ``` + * In addition, `Kernel.system` accepts an optional environment hash as the + * first argument and an optional options hash as the last argument. + * We don't yet distinguish between these arguments and the command arguments. + * ```ruby + * system({"FOO" => "BAR"}, "cat foo.txt | tail", {unsetenv_others: true}) + * ``` + * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-system + */ +class KernelSystemCall extends SystemCommandExecution::Range, KernelMethodCall { + KernelSystemCall() { this.getMethodName() = "system" } + + override DataFlow::Node getAnArgument() { result = this.getArgument(_) } + + override predicate isShellInterpreted(DataFlow::Node arg) { + // Kernel.system invokes a subshell if you provide a single string as argument + this.getNumberOfArguments() = 1 and arg = getAnArgument() + } +} + +/** + * A system command executed via the `Kernel.exec` method. + * `Kernel.exec` takes the same argument forms as `Kernel.system`. See `KernelSystemCall` for details. + * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-exec + */ +class KernelExecCall extends SystemCommandExecution::Range, KernelMethodCall { + KernelExecCall() { this.getMethodName() = "exec" } + + override DataFlow::Node getAnArgument() { result = this.getArgument(_) } + + override predicate isShellInterpreted(DataFlow::Node arg) { + // Kernel.exec invokes a subshell if you provide a single string as argument + this.getNumberOfArguments() = 1 and arg = getAnArgument() + } +} + +/** + * A system command executed via the `Kernel.spawn` method. + * `Kernel.spawn` takes the same argument forms as `Kernel.system`. + * See `KernelSystemCall` for details. + * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-spawn + * TODO: document and handle the env and option arguments. + * ``` + * spawn([env,] command... [,options]) -> pid + * ``` + */ +class KernelSpawnCall extends SystemCommandExecution::Range, KernelMethodCall { + KernelSpawnCall() { this.getMethodName() = "spawn" } + + override DataFlow::Node getAnArgument() { result = this.getArgument(_) } + + override predicate isShellInterpreted(DataFlow::Node arg) { + // Kernel.spawn invokes a subshell if you provide a single string as argument + this.getNumberOfArguments() = 1 and arg = getAnArgument() + } +} + +/** + * A system command executed via one of the `Open3` methods. + * These methods take the same argument forms as `Kernel.system`. + * See `KernelSystemCall` for details. + */ +class Open3Call extends SystemCommandExecution::Range { + MethodCall methodCall; + + Open3Call() { + this.asExpr().getExpr() = methodCall and + this = + API::getTopLevelMember("Open3") + .getAMethodCall(["popen3", "popen2", "popen2e", "capture3", "capture2", "capture2e"]) + } + + override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() } + + override predicate isShellInterpreted(DataFlow::Node arg) { + // These Open3 methods invoke a subshell if you provide a single string as argument + methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument() + } +} + +/** + * A pipeline of system commands constructed via one of the `Open3` methods. + * These methods accept a variable argument list of commands. + * Commands can be in any form supported by `Kernel.system`. See `KernelSystemCall` for details. + * ```ruby + * Open3.pipeline("cat foo.txt", "tail") + * Open3.pipeline(["cat", "foo.txt"], "tail") + * Open3.pipeline([{}, "cat", "foo.txt"], "tail") + * Open3.pipeline([["cat", "cat"], "foo.txt"], "tail") + */ +class Open3PipelineCall extends SystemCommandExecution::Range { + MethodCall methodCall; + + Open3PipelineCall() { + this.asExpr().getExpr() = methodCall and + this = + API::getTopLevelMember("Open3") + .getAMethodCall(["pipeline_rw", "pipeline_r", "pipeline_w", "pipeline_start", "pipeline"]) + } + + override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() } + + override predicate isShellInterpreted(DataFlow::Node arg) { + // A command in the pipeline is executed in a subshell if it is given as a single string argument. + arg.asExpr().getExpr() instanceof StringlikeLiteral and + arg.asExpr().getExpr() = methodCall.getAnArgument() + } +} + +/** + * A call to `Kernel.eval`, which executes its first argument as Ruby code. + * ```ruby + * a = 1 + * Kernel.eval("a = 2") + * a # => 2 + * ``` + */ +class EvalCallCodeExecution extends CodeExecution::Range, KernelMethodCall { + EvalCallCodeExecution() { this.getMethodName() = "eval" } + + override DataFlow::Node getCode() { result = this.getArgument(0) } +} + +/** + * A call to `Kernel#send`, which executes its first argument as a Ruby method call. + * ```ruby + * arr = [] + * arr.send("push", 1) + * arr # => [1] + * ``` + */ +class SendCallCodeExecution extends CodeExecution::Range, KernelMethodCall { + SendCallCodeExecution() { this.getMethodName() = "send" } + + override DataFlow::Node getCode() { result = this.getArgument(0) } +} + +/** + * A call to `BasicObject#instance_eval`, which executes its first argument as Ruby code. + */ +class InstanceEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode { + InstanceEvalCallCodeExecution() { + this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "instance_eval" + } + + override DataFlow::Node getCode() { result = this.getArgument(0) } +} + +/** + * A call to `Module#class_eval`, which executes its first argument as Ruby code. + */ +class ClassEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode { + ClassEvalCallCodeExecution() { + this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "class_eval" + } + + override DataFlow::Node getCode() { result = this.getArgument(0) } +} + +/** + * A call to `Module#module_eval`, which executes its first argument as Ruby code. + */ +class ModuleEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode { + ModuleEvalCallCodeExecution() { + this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "module_eval" + } + + override DataFlow::Node getCode() { result = this.getArgument(0) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll new file mode 100644 index 00000000000..3e37ec6a514 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll @@ -0,0 +1,182 @@ +private import codeql.ruby.Concepts +private import codeql.ruby.AST +private import codeql.ruby.DataFlow +private import codeql.ruby.typetracking.TypeTracker +private import codeql.ruby.ApiGraphs +private import codeql.ruby.controlflow.CfgNodes as CfgNodes + +private class NokogiriXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode { + NokogiriXmlParserCall() { + this = + [ + API::getTopLevelMember("Nokogiri").getMember("XML"), + API::getTopLevelMember("Nokogiri").getMember("XML").getMember("Document"), + API::getTopLevelMember("Nokogiri") + .getMember("XML") + .getMember("SAX") + .getMember("Parser") + .getInstance() + ].getAMethodCall("parse") + } + + override DataFlow::Node getInput() { result = this.getArgument(0) } + + override predicate externalEntitiesEnabled() { + this.getArgument(3) = + [trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()), trackDisableFeature(TNONET())] + or + // calls to methods that enable/disable features in a block argument passed to this parser call. + // For example: + // ```ruby + // doc.parse(...) { |options| options.nononet; options.noent } + // ``` + this.asExpr() + .getExpr() + .(MethodCall) + .getBlock() + .getAStmt() + .getAChild*() + .(MethodCall) + .getMethodName() = ["noent", "dtdload", "nononet"] + } +} + +private class LibXmlRubyXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode { + LibXmlRubyXmlParserCall() { + this = + [API::getTopLevelMember("LibXML").getMember("XML"), API::getTopLevelMember("XML")] + .getMember(["Document", "Parser"]) + .getAMethodCall(["file", "io", "string"]) + } + + override DataFlow::Node getInput() { result = this.getArgument(0) } + + override predicate externalEntitiesEnabled() { + exists(Pair pair | + pair = this.getArgument(1).asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + pair.getKey().(Literal).getValueText() = "options" and + pair.getValue() = + [ + trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()), + trackDisableFeature(TNONET()) + ].asExpr().getExpr() + ) + } +} + +private newtype TFeature = + TNOENT() or + TNONET() or + TDTDLOAD() + +class Feature extends TFeature { + abstract int getValue(); + + string toString() { result = getConstantName() } + + abstract string getConstantName(); +} + +private class FeatureNOENT extends Feature, TNOENT { + override int getValue() { result = 2 } + + override string getConstantName() { result = "NOENT" } +} + +private class FeatureNONET extends Feature, TNONET { + override int getValue() { result = 2048 } + + override string getConstantName() { result = "NONET" } +} + +private class FeatureDTDLOAD extends Feature, TDTDLOAD { + override int getValue() { result = 4 } + + override string getConstantName() { result = "DTDLOAD" } +} + +private API::Node parseOptionsModule() { + result = API::getTopLevelMember("Nokogiri").getMember("XML").getMember("ParseOptions") + or + result = + API::getTopLevelMember("LibXML").getMember("XML").getMember("Parser").getMember("Options") + or + result = API::getTopLevelMember("XML").getMember("Parser").getMember("Options") +} + +private predicate bitWiseAndOr(CfgNodes::ExprNodes::OperationCfgNode operation) { + operation.getExpr() instanceof BitwiseAndExpr or + operation.getExpr() instanceof AssignBitwiseAndExpr or + operation.getExpr() instanceof BitwiseOrExpr or + operation.getExpr() instanceof AssignBitwiseOrExpr +} + +private DataFlow::LocalSourceNode trackFeature(Feature f, boolean enable, TypeTracker t) { + t.start() and + ( + // An integer literal with the feature-bit enabled/disabled + exists(int bitValue | + bitValue = result.asExpr().getExpr().(IntegerLiteral).getValue().bitAnd(f.getValue()) + | + if bitValue = 0 then enable = false else enable = true + ) + or + // Use of a constant f + enable = true and + result = parseOptionsModule().getMember(f.getConstantName()).getAUse() + or + // Treat `&`, `&=`, `|` and `|=` operators as if they preserve the on/off states + // of their operands. This is an overapproximation but likely to work well in practice + // because it makes little sense to explicitly set a feature to both `on` and `off` in the + // same code. + exists(CfgNodes::ExprNodes::OperationCfgNode operation | + bitWiseAndOr(operation) and + operation = result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode) and + operation.getAnOperand() = trackFeature(f, enable).asExpr() + ) + or + // The complement operator toggles a feature from enabled to disabled and vice-versa + result.asExpr().getExpr() instanceof ComplementExpr and + result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode).getAnOperand() = + trackFeature(f, enable.booleanNot()).asExpr() + or + // Nokogiri has a ParseOptions class that is a wrapper around the bit-fields and + // provides methods for querying and updating the fields. + result = + API::getTopLevelMember("Nokogiri") + .getMember("XML") + .getMember("ParseOptions") + .getAnInstantiation() and + result.asExpr().(CfgNodes::ExprNodes::CallCfgNode).getArgument(0) = + trackFeature(f, enable).asExpr() + or + // The Nokogiri ParseOptions class has methods for setting/unsetting features. + // The method names are the lowercase variants of the constant names, with a "no" + // prefix for unsetting a feature. + exists(CfgNodes::ExprNodes::CallCfgNode call | + enable = true and + call.getExpr().(MethodCall).getMethodName() = f.getConstantName().toLowerCase() + or + enable = false and + call.getExpr().(MethodCall).getMethodName() = "no" + f.getConstantName().toLowerCase() + | + ( + // these methods update the receiver + result.flowsTo(any(DataFlow::Node n | n.asExpr() = call.getReceiver())) + or + // in addition they return the (updated) receiver to allow chaining calls. + result.asExpr() = call + ) + ) + ) + or + exists(TypeTracker t2 | result = trackFeature(f, enable, t2).track(t2, t)) +} + +private DataFlow::Node trackFeature(Feature f, boolean enable) { + trackFeature(f, enable, TypeTracker::end()).flowsTo(result) +} + +private DataFlow::Node trackEnableFeature(Feature f) { result = trackFeature(f, true) } + +private DataFlow::Node trackDisableFeature(Feature f) { result = trackFeature(f, false) } diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll new file mode 100644 index 00000000000..efb9d7be66c --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll @@ -0,0 +1,130 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `Excon`. + * ```ruby + * # one-off request + * Excon.get("http://example.com").body + * + * # connection re-use + * connection = Excon.new("http://example.com") + * connection.get(path: "/").body + * connection.request(method: :get, path: "/") + * ``` + * + * TODO: pipelining, streaming responses + * https://github.com/excon/excon/blob/master/README.md + */ +class ExconHttpRequest extends HTTP::Client::Request::Range { + DataFlow::Node requestUse; + API::Node requestNode; + API::Node connectionNode; + + ExconHttpRequest() { + requestUse = requestNode.getAnImmediateUse() and + connectionNode = + [ + // one-off requests + API::getTopLevelMember("Excon"), + // connection re-use + API::getTopLevelMember("Excon").getInstance(), + API::getTopLevelMember("Excon").getMember("Connection").getInstance() + ] and + requestNode = + connectionNode + .getReturn([ + // Excon#request exists but Excon.request doesn't. + // This shouldn't be a problem - in real code the latter would raise NoMethodError anyway. + "get", "head", "delete", "options", "post", "put", "patch", "trace", "request" + ]) and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // Check for `ssl_verify_peer: false` in the options hash. + exists(DataFlow::Node arg, int i | + i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + | + argSetsVerifyPeer(arg, false, disablingNode) + ) + or + // Or we see a call to `Excon.defaults[:ssl_verify_peer] = false` before the + // request, and no `ssl_verify_peer: true` in the explicit options hash for + // the request call. + exists(DataFlow::CallNode disableCall | + setsDefaultVerification(disableCall, false) and + disableCall.asExpr().getASuccessor+() = requestUse.asExpr() and + disablingNode = disableCall and + not exists(DataFlow::Node arg, int i | + i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + | + argSetsVerifyPeer(arg, true, _) + ) + ) + } + + override string getFramework() { result = "Excon" } +} + +/** + * Holds if `arg` represents an options hash that contains the key + * `:ssl_verify_peer` with `value`, where `kvNode` is the data-flow node for + * this key-value pair. + */ +predicate argSetsVerifyPeer(DataFlow::Node arg, boolean value, DataFlow::Node kvNode) { + // Either passed as an individual key:value argument, e.g.: + // Excon.get(..., ssl_verify_peer: false) + isSslVerifyPeerPair(arg.asExpr().getExpr(), value) and + kvNode = arg + or + // Or as a single hash argument, e.g.: + // Excon.get(..., { ssl_verify_peer: false, ... }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isSslVerifyPeerPair(p, value) and + optionsNode.flowsTo(arg) and + kvNode.asExpr().getExpr() = p + ) +} + +/** + * Holds if `callNode` sets `Excon.defaults[:ssl_verify_peer]` or + * `Excon.ssl_verify_peer` to `value`. + */ +private predicate setsDefaultVerification(DataFlow::CallNode callNode, boolean value) { + callNode = API::getTopLevelMember("Excon").getReturn("defaults").getAMethodCall("[]=") and + isSslVerifyPeerLiteral(callNode.getArgument(0)) and + hasBooleanValue(callNode.getArgument(1), value) + or + callNode = API::getTopLevelMember("Excon").getAMethodCall("ssl_verify_peer=") and + hasBooleanValue(callNode.getArgument(0), value) +} + +private predicate isSslVerifyPeerLiteral(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_peer" and + literal.flowsTo(node) + ) +} + +/** Holds if `node` can contain `value`. */ +private predicate hasBooleanValue(DataFlow::Node node, boolean value) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(BooleanLiteral).getValue() = value and + literal.flowsTo(node) + ) +} + +/** Holds if `p` is the pair `ssl_verify_peer: `. */ +private predicate isSslVerifyPeerPair(Pair p, boolean value) { + exists(DataFlow::Node key, DataFlow::Node valueNode | + key.asExpr().getExpr() = p.getKey() and valueNode.asExpr().getExpr() = p.getValue() + | + isSslVerifyPeerLiteral(key) and + hasBooleanValue(valueNode, value) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll new file mode 100644 index 00000000000..de3f6f5f811 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll @@ -0,0 +1,140 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `Faraday`. + * ```ruby + * # one-off request + * Faraday.get("http://example.com").body + * + * # connection re-use + * connection = Faraday.new("http://example.com") + * connection.get("/").body + * ``` + */ +class FaradayHttpRequest extends HTTP::Client::Request::Range { + DataFlow::Node requestUse; + API::Node requestNode; + API::Node connectionNode; + + FaradayHttpRequest() { + connectionNode = + [ + // one-off requests + API::getTopLevelMember("Faraday"), + // connection re-use + API::getTopLevelMember("Faraday").getInstance() + ] and + requestNode = + connectionNode.getReturn(["get", "head", "delete", "post", "put", "patch", "trace"]) and + requestUse = requestNode.getAnImmediateUse() and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // `Faraday::new` takes an options hash as its second argument, and we're + // looking for + // `{ ssl: { verify: false } }` + // or + // `{ ssl: { verify_mode: OpenSSL::SSL::VERIFY_NONE } }` + exists(DataFlow::Node arg, int i | + i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + | + // Either passed as an individual key:value argument, e.g.: + // Faraday.new(..., ssl: {...}) + isSslOptionsPairDisablingValidation(arg.asExpr().getExpr()) and + disablingNode = arg + or + // Or as a single hash argument, e.g.: + // Faraday.new(..., { ssl: {...} }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isSslOptionsPairDisablingValidation(p) and + optionsNode.flowsTo(arg) and + disablingNode.asExpr().getExpr() = p + ) + ) + } + + override string getFramework() { result = "Faraday" } +} + +/** + * Holds if the pair `p` contains the key `:ssl` for which the value is a hash + * containing either `verify: false` or + * `verify_mode: OpenSSL::SSL::VERIFY_NONE`. + */ +private predicate isSslOptionsPairDisablingValidation(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isSymbolLiteral(key, "ssl") and + (isHashWithVerifyFalse(value) or isHashWithVerifyModeNone(value)) + ) +} + +/** Holds if `node` represents the symbol literal with the given `valueText`. */ +private predicate isSymbolLiteral(DataFlow::Node node, string valueText) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = valueText and + literal.flowsTo(node) + ) +} + +/** + * Holds if `node` represents a hash containing the key-value pair + * `verify: false`. + */ +private predicate isHashWithVerifyFalse(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode hash | + isVerifyFalsePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and + hash.flowsTo(node) + ) +} + +/** + * Holds if `node` represents a hash containing the key-value pair + * `verify_mode: OpenSSL::SSL::VERIFY_NONE`. + */ +private predicate isHashWithVerifyModeNone(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode hash | + isVerifyModeNonePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and + hash.flowsTo(node) + ) +} + +/** + * Holds if the pair `p` has the key `:verify_mode` and the value + * `OpenSSL::SSL::VERIFY_NONE`. + */ +private predicate isVerifyModeNonePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isSymbolLiteral(key, "verify_mode") and + value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + ) +} + +/** + * Holds if the pair `p` has the key `:verify` and the value `false`. + */ +private predicate isVerifyFalsePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isSymbolLiteral(key, "verify") and + isFalse(value) + ) +} + +/** Holds if `node` can contain the Boolean value `false`. */ +private predicate isFalse(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(BooleanLiteral).isFalse() and + literal.flowsTo(node) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll new file mode 100644 index 00000000000..3db9c653a5c --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll @@ -0,0 +1,55 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `HTTPClient`. + * ```ruby + * HTTPClient.get("http://example.com").body + * HTTPClient.get_content("http://example.com") + * ``` + */ +class HttpClientRequest extends HTTP::Client::Request::Range { + API::Node requestNode; + API::Node connectionNode; + DataFlow::Node requestUse; + string method; + + HttpClientRequest() { + connectionNode = + [ + // One-off requests + API::getTopLevelMember("HTTPClient"), + // Conncection re-use + API::getTopLevelMember("HTTPClient").getInstance() + ] and + requestNode = connectionNode.getReturn(method) and + requestUse = requestNode.getAnImmediateUse() and + method in [ + "get", "head", "delete", "options", "post", "put", "trace", "get_content", "post_content" + ] and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { + // The `get_content` and `post_content` methods return the response body as + // a string. The other methods return a `HTTPClient::Message` object which + // has various methods that return the response body. + method in ["get_content", "post_content"] and result = requestUse + or + not method in ["get_content", "put_content"] and + result = requestNode.getAMethodCall(["body", "http_body", "content", "dump"]) + } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // Look for calls to set + // `c.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE` + // on an HTTPClient connection object `c`. + disablingNode = + connectionNode.getReturn("ssl_config").getReturn("verify_mode=").getAnImmediateUse() and + disablingNode.(DataFlow::CallNode).getArgument(0) = + API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + } + + override string getFramework() { result = "HTTPClient" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll new file mode 100644 index 00000000000..b1746692bf7 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll @@ -0,0 +1,95 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `HTTParty`. + * ```ruby + * # one-off request - returns the response body + * HTTParty.get("http://example.com") + * + * # TODO: module inclusion + * class MyClass + * include HTTParty + * end + * + * MyClass.new("http://example.com") + * ``` + */ +class HttpartyRequest extends HTTP::Client::Request::Range { + API::Node requestNode; + DataFlow::Node requestUse; + + HttpartyRequest() { + requestUse = requestNode.getAnImmediateUse() and + requestNode = + API::getTopLevelMember("HTTParty") + .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { + // If HTTParty can recognise the response type, it will parse and return it + // directly from the request call. Otherwise, it will return a `HTTParty::Response` + // object that has a `#body` method. + // So if there's a call to `#body` on the response, treat that as the response body. + exists(DataFlow::Node r | r = requestNode.getAMethodCall("body") | result = r) + or + // Otherwise, treat the response as the response body. + not exists(DataFlow::Node r | r = requestNode.getAMethodCall("body")) and + result = requestUse + } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // The various request methods take an options hash as their second + // argument, and we're looking for `{ verify: false }` or + // `{ verify_peer: false }`. + exists(DataFlow::Node arg, int i | + i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i) + | + // Either passed as an individual key:value argument, e.g.: + // HTTParty.get(..., verify: false) + isVerifyFalsePair(arg.asExpr().getExpr()) and + disablingNode = arg + or + // Or as a single hash argument, e.g.: + // HTTParty.get(..., { verify: false, ... }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isVerifyFalsePair(p) and + optionsNode.flowsTo(arg) and + disablingNode.asExpr().getExpr() = p + ) + ) + } + + override string getFramework() { result = "HTTParty" } +} + +/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */ +private predicate isVerifyLiteral(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = ["verify", "verify_peer"] and + literal.flowsTo(node) + ) +} + +/** Holds if `node` can contain the Boolean value `false`. */ +private predicate isFalse(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(BooleanLiteral).isFalse() and + literal.flowsTo(node) + ) +} + +/** + * Holds if `p` is the pair `verify: false` or `verify_peer: false`. + */ +private predicate isVerifyFalsePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isVerifyLiteral(key) and + isFalse(value) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll new file mode 100644 index 00000000000..9d9c6f7aff3 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll @@ -0,0 +1,69 @@ +private import codeql.ruby.AST +private import codeql.ruby.Concepts +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.ApiGraphs +private import codeql.ruby.dataflow.internal.DataFlowPublic + +/** + * A `Net::HTTP` call which initiates an HTTP request. + * ```ruby + * Net::HTTP.get("http://example.com/") + * Net::HTTP.post("http://example.com/", "some_data") + * req = Net::HTTP.new("example.com") + * response = req.get("/") + * ``` + */ +class NetHttpRequest extends HTTP::Client::Request::Range { + private DataFlow::CallNode request; + private DataFlow::Node responseBody; + + NetHttpRequest() { + exists(API::Node requestNode, string method | + request = requestNode.getAnImmediateUse() and + this = request.asExpr().getExpr() + | + // Net::HTTP.get(...) + method = "get" and + requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and + responseBody = request + or + // Net::HTTP.post(...).body + method in ["post", "post_form"] and + requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and + responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"]) + or + // Net::HTTP.new(..).get(..).body + method in [ + "get", "get2", "request_get", "head", "head2", "request_head", "delete", "put", "patch", + "post", "post2", "request_post", "request" + ] and + requestNode = API::getTopLevelMember("Net").getMember("HTTP").getInstance().getReturn(method) and + responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"]) + ) + } + + /** + * Gets the node representing the URL of the request. + * Currently unused, but may be useful in future, e.g. to filter out certain requests. + */ + DataFlow::Node getURLArgument() { result = request.getArgument(0) } + + override DataFlow::Node getResponseBody() { result = responseBody } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // A Net::HTTP request bypasses certificate validation if we see a setter + // call like this: + // foo.verify_mode = OpenSSL::SSL::VERIFY_NONE + // and then the receiver of that call flows to the receiver in the request: + // foo.request(...) + exists(DataFlow::CallNode setter | + disablingNode = + API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() and + setter.asExpr().getExpr().(SetterMethodCall).getMethodName() = "verify_mode=" and + disablingNode = setter.getArgument(0) and + localFlow(setter.getReceiver(), request.getReceiver()) + ) + } + + override string getFramework() { result = "Net::HTTP" } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll new file mode 100644 index 00000000000..54a2c180fec --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll @@ -0,0 +1,113 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs +private import codeql.ruby.frameworks.StandardLibrary + +/** + * A call that makes an HTTP request using `OpenURI` via `URI.open` or + * `URI.parse(...).open`. + * + * ```ruby + * URI.open("http://example.com").readlines + * URI.parse("http://example.com").open.read + * ``` + */ +class OpenUriRequest extends HTTP::Client::Request::Range { + API::Node requestNode; + DataFlow::Node requestUse; + + OpenUriRequest() { + requestNode = + [API::getTopLevelMember("URI"), API::getTopLevelMember("URI").getReturn("parse")] + .getReturn("open") and + requestUse = requestNode.getAnImmediateUse() and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { + result = requestNode.getAMethodCall(["read", "readlines"]) + } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + exists(DataFlow::Node arg | + arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(_) + | + argumentDisablesValidation(arg, disablingNode) + ) + } + + override string getFramework() { result = "OpenURI" } +} + +/** + * A call that makes an HTTP request using `OpenURI` and its `Kernel.open` + * interface. + * + * ```ruby + * Kernel.open("http://example.com").read + * ``` + */ +class OpenUriKernelOpenRequest extends HTTP::Client::Request::Range { + DataFlow::Node requestUse; + + OpenUriKernelOpenRequest() { + requestUse instanceof KernelMethodCall and + this.getMethodName() = "open" and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::CallNode getResponseBody() { + result.asExpr().getExpr().(MethodCall).getMethodName() in ["read", "readlines"] and + requestUse.(DataFlow::LocalSourceNode).flowsTo(result.getReceiver()) + } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + exists(DataFlow::Node arg, int i | + i > 0 and + arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i) + | + argumentDisablesValidation(arg, disablingNode) + ) + } + + override string getFramework() { result = "OpenURI" } +} + +/** + * Holds if the argument `arg` is an options hash that disables certificate + * validation, and `disablingNode` is the specific node representing the + * `ssl_verify_mode: OpenSSL::SSL_VERIFY_NONE` pair. + */ +private predicate argumentDisablesValidation(DataFlow::Node arg, DataFlow::Node disablingNode) { + // Either passed as an individual key:value argument, e.g.: + // URI.open(..., ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE) + isSslVerifyModeNonePair(arg.asExpr().getExpr()) and + disablingNode = arg + or + // Or as a single hash argument, e.g.: + // URI.open(..., { ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, ... }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isSslVerifyModeNonePair(p) and + optionsNode.flowsTo(arg) and + disablingNode.asExpr().getExpr() = p + ) +} + +/** Holds if `p` is the pair `ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE`. */ +private predicate isSslVerifyModeNonePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isSslVerifyModeLiteral(key) and + value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + ) +} + +/** Holds if `node` can represent the symbol literal `:ssl_verify_mode`. */ +private predicate isSslVerifyModeLiteral(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_mode" and + literal.flowsTo(node) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll new file mode 100644 index 00000000000..3b6ff318b66 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll @@ -0,0 +1,71 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `RestClient`. + * ```ruby + * RestClient.get("http://example.com").body + * ``` + */ +class RestClientHttpRequest extends HTTP::Client::Request::Range { + DataFlow::Node requestUse; + API::Node requestNode; + API::Node connectionNode; + + RestClientHttpRequest() { + connectionNode = + [ + API::getTopLevelMember("RestClient"), + API::getTopLevelMember("RestClient").getMember("Resource").getInstance() + ] and + requestNode = + connectionNode.getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and + requestUse = requestNode.getAnImmediateUse() and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // `RestClient::Resource::new` takes an options hash argument, and we're + // looking for `{ verify_ssl: OpenSSL::SSL::VERIFY_NONE }`. + exists(DataFlow::Node arg, int i | + i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + | + // Either passed as an individual key:value argument, e.g.: + // RestClient::Resource.new(..., verify_ssl: OpenSSL::SSL::VERIFY_NONE) + isVerifySslNonePair(arg.asExpr().getExpr()) and + disablingNode = arg + or + // Or as a single hash argument, e.g.: + // RestClient::Resource.new(..., { verify_ssl: OpenSSL::SSL::VERIFY_NONE }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isVerifySslNonePair(p) and + optionsNode.flowsTo(arg) and + disablingNode.asExpr().getExpr() = p + ) + ) + } + + override string getFramework() { result = "RestClient" } +} + +/** Holds if `p` is the pair `verify_ssl: OpenSSL::SSL::VERIFY_NONE`. */ +private predicate isVerifySslNonePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue() + | + isSslVerifyModeLiteral(key) and + value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + ) +} + +/** Holds if `node` can represent the symbol literal `:verify_ssl`. */ +private predicate isSslVerifyModeLiteral(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "verify_ssl" and + literal.flowsTo(node) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll new file mode 100644 index 00000000000..38fa5288079 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll @@ -0,0 +1,74 @@ +private import ruby +private import codeql.ruby.Concepts +private import codeql.ruby.ApiGraphs + +/** + * A call that makes an HTTP request using `Typhoeus`. + * ```ruby + * Typhoeus.get("http://example.com").body + * ``` + */ +class TyphoeusHttpRequest extends HTTP::Client::Request::Range { + DataFlow::Node requestUse; + API::Node requestNode; + + TyphoeusHttpRequest() { + requestUse = requestNode.getAnImmediateUse() and + requestNode = + API::getTopLevelMember("Typhoeus") + .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and + this = requestUse.asExpr().getExpr() + } + + override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") } + + override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { + // Check for `ssl_verifypeer: false` in the options hash. + exists(DataFlow::Node arg, int i | + i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i) + | + // Either passed as an individual key:value argument, e.g.: + // Typhoeus.get(..., ssl_verifypeer: false) + isSslVerifyPeerFalsePair(arg.asExpr().getExpr()) and + disablingNode = arg + or + // Or as a single hash argument, e.g.: + // Typhoeus.get(..., { ssl_verifypeer: false, ... }) + exists(DataFlow::LocalSourceNode optionsNode, Pair p | + p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and + isSslVerifyPeerFalsePair(p) and + optionsNode.flowsTo(arg) and + disablingNode.asExpr().getExpr() = p + ) + ) + } + + override string getFramework() { result = "Typhoeus" } +} + +/** Holds if `p` is the pair `ssl_verifypeer: false`. */ +private predicate isSslVerifyPeerFalsePair(Pair p) { + exists(DataFlow::Node key, DataFlow::Node value | + key.asExpr().getExpr() = p.getKey() and + value.asExpr().getExpr() = p.getValue() + | + isSslVerifyPeerLiteral(key) and + isFalse(value) + ) +} + +/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */ +private predicate isSslVerifyPeerLiteral(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verifypeer" and + literal.flowsTo(node) + ) +} + +/** Holds if `node` can contain the Boolean value `false`. */ +private predicate isFalse(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode literal | + literal.asExpr().getExpr().(BooleanLiteral).isFalse() and + literal.flowsTo(node) + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll new file mode 100644 index 00000000000..0b5604dc670 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll @@ -0,0 +1,203 @@ +/** + * Provides queries to pretty-print a Ruby abstract syntax tree as a graph. + * + * By default, this will print the AST for all nodes in the database. To change + * this behavior, extend `PrintASTConfiguration` and override `shouldPrintNode` + * to hold for only the AST nodes you wish to view. + */ + +private import AST +private import codeql.ruby.regexp.RegExpTreeView as RETV + +/** Holds if `n` appears in the desugaring of some other node. */ +predicate isDesugared(AstNode n) { + n = any(AstNode sugar).getDesugared() + or + isDesugared(n.getParent()) +} + +/** + * The query can extend this class to control which nodes are printed. + */ +class PrintAstConfiguration extends string { + PrintAstConfiguration() { this = "PrintAstConfiguration" } + + /** + * Holds if the given node should be printed. + */ + predicate shouldPrintNode(AstNode n) { + not isDesugared(n) + or + not n.isSynthesized() + or + n.isSynthesized() and + not n = any(AstNode sugar).getDesugared() and + exists(AstNode parent | + parent = n.getParent() and + not parent.isSynthesized() and + not n = parent.getDesugared() + ) + } + + predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) { + child = parent.getAChild(edgeName) and + not child = parent.getDesugared() + } +} + +private predicate shouldPrintNode(AstNode n) { + any(PrintAstConfiguration config).shouldPrintNode(n) +} + +private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) { + any(PrintAstConfiguration config).shouldPrintAstEdge(parent, edgeName, child) +} + +newtype TPrintNode = + TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or + TPrintRegExpNode(RETV::RegExpTerm term) { + exists(RegExpLiteral literal | + shouldPrintNode(literal) and + term.getRootTerm() = literal.getParsed() + ) + } + +/** + * A node in the output tree. + */ +class PrintAstNode extends TPrintNode { + /** Gets a textual representation of this node in the PrintAst output tree. */ + string toString() { none() } + + /** + * Gets the child node with name `edgeName`. Typically this is the name of the + * predicate used to access the child. + */ + PrintAstNode getChild(string edgeName) { none() } + + /** Gets a child of this node. */ + final PrintAstNode getAChild() { result = getChild(_) } + + /** Gets the parent of this node, if any. */ + final PrintAstNode getParent() { result.getAChild() = this } + + /** + * Holds if this node is at the specified location. The location spans column + * `startcolumn` of line `startline` to column `endcolumn` of line `endline` + * in file `filepath`. For more information, see + * [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets a value used to order this node amongst its siblings. */ + int getOrder() { none() } + + /** + * Gets the value of the property of this node, where the name of the property + * is `key`. + */ + final string getProperty(string key) { + key = "semmle.label" and + result = this.toString() + or + key = "semmle.order" and result = this.getOrder().toString() + } +} + +/** An `AstNode` in the output tree. */ +class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode { + AstNode astNode; + + PrintRegularAstNode() { this = TPrintRegularAstNode(astNode) } + + override string toString() { + result = "[" + concat(astNode.getAPrimaryQlClass(), ", ") + "] " + astNode.toString() + } + + override PrintAstNode getChild(string edgeName) { + exists(AstNode child | shouldPrintAstEdge(astNode, edgeName, child) | + result = TPrintRegularAstNode(child) + ) + or + // If this AST node is a regexp literal, add the parsed regexp tree as a + // child. + exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() | + result = TPrintRegExpNode(t) and edgeName = "getParsed" + ) + } + + override int getOrder() { + this = + rank[result](PrintRegularAstNode p, Location l, File f | + l = p.getLocation() and + f = l.getFile() + | + p order by f.getBaseName(), f.getAbsolutePath(), l.getStartLine(), l.getStartColumn() + ) + } + + /** Gets the location of this node. */ + Location getLocation() { result = astNode.getLocation() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + astNode.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** A parsed regexp node in the output tree. */ +class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode { + RETV::RegExpTerm regexNode; + + PrintRegExpNode() { this = TPrintRegExpNode(regexNode) } + + override string toString() { + result = "[" + concat(regexNode.getAPrimaryQlClass(), ", ") + "] " + regexNode.toString() + } + + override PrintAstNode getChild(string edgeName) { + // Use the child index as an edge name. + exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString()) + } + + override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + regexNode.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Holds if `node` belongs to the output tree, and its property `key` has the + * given `value`. + */ +query predicate nodes(PrintAstNode node, string key, string value) { value = node.getProperty(key) } + +/** + * Holds if `target` is a child of `source` in the AST, and property `key` of + * the edge has the given `value`. + */ +query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) { + target = source.getChild(_) and + ( + key = "semmle.label" and + value = strictconcat(string name | source.getChild(name) = target | name, "/") + or + key = "semmle.order" and + value = target.getProperty("semmle.order") + ) +} + +/** + * Holds if property `key` of the graph has the given `value`. + */ +query predicate graphProperties(string key, string value) { + key = "semmle.graphKind" and value = "tree" +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll new file mode 100644 index 00000000000..a805366bab8 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll @@ -0,0 +1,343 @@ +private import ReDoSUtil +private import RegExpTreeView +private import codeql.Locations + +/* + * This query implements the analysis described in the following two papers: + * + * James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for + * Regular Expression Denial-of-Service Attacks. NSS 2013. + * (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf) + * Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression + * Exponential Runtime via Substructural Logics. 2014. + * (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf) + * + * The basic idea is to search for overlapping cycles in the NFA, that is, + * states `q` such that there are two distinct paths from `q` to itself + * that consume the same word `w`. + * + * For any such state `q`, an attack string can be constructed as follows: + * concatenate a prefix `v` that takes the NFA to `q` with `n` copies of + * the word `w` that leads back to `q` along two different paths, followed + * by a suffix `x` that is _not_ accepted in state `q`. A backtracking + * implementation will need to explore at least 2^n different ways of going + * from `q` back to itself while trying to match the `n` copies of `w` + * before finally giving up. + * + * Now in order to identify overlapping cycles, all we have to do is find + * pumpable forks, that is, states `q` that can transition to two different + * states `r1` and `r2` on the same input symbol `c`, such that there are + * paths from both `r1` and `r2` to `q` that consume the same word. The latter + * condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)` + * in the product NFA. + * + * This is what the query does. It makes a simple attempt to construct a + * prefix `v` leading into `q`, but only to improve the alert message. + * And the query tries to prove the existence of a suffix that ensures + * rejection. This check might fail, which can cause false positives. + * + * Finally, sometimes it depends on the translation whether the NFA generated + * for a regular expression has a pumpable fork or not. We implement one + * particular translation, which may result in false positives or negatives + * relative to some particular JavaScript engine. + * + * More precisely, the query constructs an NFA from a regular expression `r` + * as follows: + * + * * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing + * the state of the automaton before attempting to match the `i`th character in `t`. + * * There is one accepting state `Accept(r)`. + * * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string + * by using an epsilon transition to `Accept(r)` and an any transition to itself. + * * Transitions between states may be labelled with epsilon, or an abstract + * input symbol. + * * Each abstract input symbol represents a set of concrete input characters: + * either a single character, a set of characters represented by a + * character class, or the set of all characters. + * * The product automaton is constructed lazily, starting with pair states + * `(q, q)` where `q` is a fork, and proceding along an over-approximate + * step relation. + * * The over-approximate step relation allows transitions along pairs of + * abstract input symbols where the symbols have overlap in the characters they accept. + * * Once a trace of pairs of abstract input symbols that leads from a fork + * back to itself has been identified, we attempt to construct a concrete + * string corresponding to it, which may fail. + * * Lastly we ensure that any state reached by repeating `n` copies of `w` has + * a suffix `x` (possible empty) that is most likely __not__ accepted. + */ + +/** + * Holds if state `s` might be inside a backtracking repetition. + */ +pragma[noinline] +private predicate stateInsideBacktracking(State s) { + s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition +} + +/** + * A infinitely repeating quantifier that might backtrack. + */ +private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier { + MaybeBacktrackingRepetition() { + exists(RegExpTerm child | + child instanceof RegExpAlt or + child instanceof RegExpQuantifier + | + child.getParent+() = this + ) + } +} + +/** + * A state in the product automaton. + * + * We lazily only construct those states that we are actually + * going to need: `(q, q)` for every fork state `q`, and any + * pair of states that can be reached from a pair that we have + * already constructed. To cut down on the number of states, + * we only represent states `(q1, q2)` where `q1` is lexicographically + * no bigger than `q2`. + * + * States are only constructed if both states in the pair are + * inside a repetition that might backtrack. + */ +private newtype TStatePair = + MkStatePair(State q1, State q2) { + isFork(q1, _, _, _, _) and q2 = q1 + or + (step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and + rankState(q1) <= rankState(q2) + } + +/** + * Gets a unique number for a `state`. + * Is used to create an ordering of states, where states with the same `toString()` will be ordered differently. + */ +private int rankState(State state) { + state = + rank[result](State s, Location l | + l = s.getRepr().getLocation() + | + s order by l.getStartLine(), l.getStartColumn(), s.toString() + ) +} + +/** + * A state in the product automaton. + */ +private class StatePair extends TStatePair { + State q1; + State q2; + + StatePair() { this = MkStatePair(q1, q2) } + + /** Gets a textual representation of this element. */ + string toString() { result = "(" + q1 + ", " + q2 + ")" } + + /** Gets the first component of the state pair. */ + State getLeft() { result = q1 } + + /** Gets the second component of the state pair. */ + State getRight() { result = q2 } +} + +/** + * Holds for all constructed state pairs. + * + * Used in `statePairDist` + */ +private predicate isStatePair(StatePair p) { any() } + +/** + * Holds if there are transitions from the components of `q` to the corresponding + * components of `r`. + * + * Used in `statePairDist` + */ +private predicate delta2(StatePair q, StatePair r) { step(q, _, _, r) } + +/** + * Gets the minimum length of a path from `q` to `r` in the + * product automaton. + */ +private int statePairDist(StatePair q, StatePair r) = + shortestDistances(isStatePair/1, delta2/2)(q, r, result) + +/** + * Holds if there are transitions from `q` to `r1` and from `q` to `r2` + * labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not + * trivially have an empty intersection. + * + * This predicate only holds for states associated with regular expressions + * that have at least one repetition quantifier in them (otherwise the + * expression cannot be vulnerable to ReDoS attacks anyway). + */ +pragma[noopt] +private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) { + stateInsideBacktracking(q) and + exists(State q1, State q2 | + q1 = epsilonSucc*(q) and + delta(q1, s1, r1) and + q2 = epsilonSucc*(q) and + delta(q2, s2, r2) and + // Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join. + // From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals, + // and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions. + exists(intersect(s1, s2)) + | + s1 != s2 + or + r1 != r2 + or + r1 = r2 and q1 != q2 + or + // If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state: + // one that uses the loop and one that doesn't. The engine will separately attempt to match with each path, + // despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not. + // To avoid every state in the loop becoming a fork state, + // we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop + // (every epsilon-loop must contain such a state). + // + // We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself. + // This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`. + // The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`, + // and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently. + r1 = r2 and + q1 = q2 and + epsilonSucc+(q) = q and + exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and + // One of the mid states is an infinite quantifier itself + exists(State mid, RegExpTerm term | + mid = epsilonSucc+(q) and + term = mid.getRepr() and + term instanceof InfiniteRepetitionQuantifier and + q = epsilonSucc+(mid) and + not mid = q + ) + ) and + stateInsideBacktracking(r1) and + stateInsideBacktracking(r2) +} + +/** + * Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only + * one or the other is defined. + */ +private StatePair mkStatePair(State q1, State q2) { + result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1) +} + +/** + * Holds if there are transitions from the components of `q` to the corresponding + * components of `r` labelled with `s1` and `s2`, respectively. + */ +private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) { + exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2)) +} + +/** + * Holds if there are transitions from the components of `q` to `r1` and `r2` + * labelled with `s1` and `s2`, respectively. + * + * We only consider transitions where the resulting states `(r1, r2)` are both + * inside a repetition that might backtrack. + */ +pragma[noopt] +private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) { + exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 | + deltaClosed(q1, s1, r1) and + deltaClosed(q2, s2, r2) and + // use noopt to force the join on `intersect` to happen last. + exists(intersect(s1, s2)) + ) and + stateInsideBacktracking(r1) and + stateInsideBacktracking(r2) +} + +private newtype TTrace = + Nil() or + Step(InputSymbol s1, InputSymbol s2, TTrace t) { + exists(StatePair p | + isReachableFromFork(_, p, t, _) and + step(p, s1, s2, _) + ) + or + t = Nil() and isFork(_, s1, s2, _, _) + } + +/** + * A list of pairs of input symbols that describe a path in the product automaton + * starting from some fork state. + */ +private class Trace extends TTrace { + /** Gets a textual representation of this element. */ + string toString() { + this = Nil() and result = "Nil()" + or + exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) | + result = "Step(" + s1 + ", " + s2 + ", " + t + ")" + ) + } +} + +/** + * Gets a string corresponding to the trace `t`. + */ +private string concretise(Trace t) { + t = Nil() and result = "" + or + exists(InputSymbol s1, InputSymbol s2, Trace rest | t = Step(s1, s2, rest) | + result = concretise(rest) + intersect(s1, s2) + ) +} + +/** + * Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is + * a path from `r` back to `(fork, fork)` with `rem` steps. + */ +private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) { + // base case + exists(InputSymbol s1, InputSymbol s2, State q1, State q2 | + isFork(fork, s1, s2, q1, q2) and + r = MkStatePair(q1, q2) and + w = Step(s1, s2, Nil()) and + rem = statePairDist(r, MkStatePair(fork, fork)) + ) + or + // recursive case + exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 | + isReachableFromFork(fork, p, v, rem + 1) and + step(p, s1, s2, r) and + w = Step(s1, s2, v) and + rem >= statePairDist(r, MkStatePair(fork, fork)) + ) +} + +/** + * Gets a state in the product automaton from which `(fork, fork)` is + * reachable in zero or more epsilon transitions. + */ +private StatePair getAForkPair(State fork) { + isFork(fork, _, _, _, _) and + result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork)) +} + +/** + * Holds if `fork` is a pumpable fork with word `w`. + */ +private predicate isPumpable(State fork, string w) { + exists(StatePair q, Trace t | + isReachableFromFork(fork, q, t, _) and + q = getAForkPair(fork) and + w = concretise(t) + ) +} + +/** + * An instantiation of `ReDoSConfiguration` for exponential backtracking. + */ +class ExponentialReDoSConfiguration extends ReDoSConfiguration { + ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" } + + override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll new file mode 100644 index 00000000000..da7a7917307 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll @@ -0,0 +1,891 @@ +/** + * Library for parsing for Ruby regular expressions. + * + * N.B. does not yet handle stripping whitespace and comments in regexes with + * the `x` (free-spacing) flag. + */ + +private import codeql.ruby.ast.Literal as AST +private import codeql.Locations + +class RegExp extends AST::RegExpLiteral { + /** + * Helper predicate for `charSetStart(int start, int end)`. + * + * In order to identify left brackets ('[') which actually start a character class, + * we perform a left to right scan of the string. + * + * To avoid negative recursion we return a boolean. See `escaping`, + * the helper for `escapingChar`, for a clean use of this pattern. + * + * result is true for those start chars that actually mark a start of a char set. + */ + boolean charSetStart(int pos) { + exists(int index | + // is opening bracket + this.charSetDelimiter(index, pos) = true and + ( + // if this is the first bracket, `pos` starts a char set + index = 1 and result = true + or + // if the previous char set delimiter was not a closing bracket, `pos` does + // not start a char set. This is needed to handle cases such as `[[]` (a + // char set that matches the `[` char) + index > 1 and + not this.charSetDelimiter(index - 1, _) = false and + result = false + or + // special handling of cases such as `[][]` (the character-set of the characters `]` and `[`). + exists(int prevClosingBracketPos | + // previous bracket is a closing bracket + this.charSetDelimiter(index - 1, prevClosingBracketPos) = false and + if + // check if the character that comes before the previous closing bracket + // is an opening bracket (taking `^` into account) + // check if the character that comes before the previous closing bracket + // is an opening bracket (taking `^` into account) + exists(int posBeforePrevClosingBracket | + if this.getChar(prevClosingBracketPos - 1) = "^" + then posBeforePrevClosingBracket = prevClosingBracketPos - 2 + else posBeforePrevClosingBracket = prevClosingBracketPos - 1 + | + this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true + ) + then + // brackets without anything in between is not valid character ranges, so + // the first closing bracket in `[]]` and `[^]]` does not count, + // + // and we should _not_ mark the second opening bracket in `[][]` and `[^][]` + // as starting a new char set. ^ ^ + exists(int posBeforePrevClosingBracket | + this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true + | + result = this.charSetStart(posBeforePrevClosingBracket).booleanNot() + ) + else + // if not, `pos` does in fact mark a real start of a character range + result = true + ) + ) + ) + } + + /** + * Helper predicate for chars that could be character-set delimiters. + * Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string. + * Result if `true` is the char is `[`, and `false` if the char is `]`. + */ + boolean charSetDelimiter(int index, int pos) { + pos = + rank[index](int p | + (this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and + // Brackets that art part of POSIX expressions should not count as + // char-set delimiters. + not exists(int x, int y | + this.posixStyleNamedCharacterProperty(x, y, _) and pos >= x and pos < y + ) + ) and + ( + this.nonEscapedCharAt(pos) = "[" and result = true + or + this.nonEscapedCharAt(pos) = "]" and result = false + ) + } + + predicate charSetStart(int start, int end) { + this.charSetStart(start) = true and + ( + this.getChar(start + 1) = "^" and end = start + 2 + or + not this.getChar(start + 1) = "^" and end = start + 1 + ) + } + + /** Whether there is a character class, between start (inclusive) and end (exclusive) */ + predicate charSet(int start, int end) { + exists(int innerStart, int innerEnd | + this.charSetStart(start, innerStart) and + not this.charSetStart(_, start) + | + end = innerEnd + 1 and + innerEnd = + min(int e | + e > innerStart and + this.nonEscapedCharAt(e) = "]" and + not exists(int x, int y | + this.posixStyleNamedCharacterProperty(x, y, _) and e >= x and e < y + ) + | + e + ) + ) + } + + predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) { + tokenStart = + rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and + this.charSetToken(charsetStart, tokenStart, tokenEnd) + } + + /** Either a char or a - */ + predicate charSetToken(int charsetStart, int start, int end) { + this.charSetStart(charsetStart, start) and + ( + this.escapedCharacter(start, end) + or + this.namedCharacterProperty(start, end, _) + or + exists(this.nonEscapedCharAt(start)) and end = start + 1 + ) + or + this.charSetToken(charsetStart, _, start) and + ( + this.escapedCharacter(start, end) + or + this.namedCharacterProperty(start, end, _) + or + exists(this.nonEscapedCharAt(start)) and + end = start + 1 and + not this.getChar(start) = "]" + ) + } + + predicate charSetChild(int charsetStart, int start, int end) { + this.charSetToken(charsetStart, start, end) and + not exists(int rangeStart, int rangeEnd | + this.charRange(charsetStart, rangeStart, _, _, rangeEnd) and + rangeStart <= start and + rangeEnd >= end + ) + or + this.charRange(charsetStart, start, _, _, end) + } + + predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) { + exists(int index | + this.charRangeEnd(charsetStart, index) = true and + this.charSetToken(charsetStart, index - 2, start, lowerEnd) and + this.charSetToken(charsetStart, index, upperStart, end) + ) + } + + private boolean charRangeEnd(int charsetStart, int index) { + this.charSetToken(charsetStart, index, _, _) and + ( + index in [1, 2] and result = false + or + index > 2 and + exists(int connectorStart | + this.charSetToken(charsetStart, index - 1, connectorStart, _) and + this.nonEscapedCharAt(connectorStart) = "-" and + result = + this.charRangeEnd(charsetStart, index - 2) + .booleanNot() + .booleanAnd(this.charRangeEnd(charsetStart, index - 1).booleanNot()) + ) + or + not exists(int connectorStart | + this.charSetToken(charsetStart, index - 1, connectorStart, _) and + this.nonEscapedCharAt(connectorStart) = "-" + ) and + result = false + ) + } + + predicate escapingChar(int pos) { this.escaping(pos) = true } + + private boolean escaping(int pos) { + pos = -1 and result = false + or + this.getChar(pos) = "\\" and result = this.escaping(pos - 1).booleanNot() + or + this.getChar(pos) != "\\" and result = false + } + + /** Gets the text of this regex */ + string getText() { result = this.getValueText() } + + string getChar(int i) { result = this.getText().charAt(i) } + + string nonEscapedCharAt(int i) { + result = this.getText().charAt(i) and + not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) + } + + private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" } + + private predicate isGroupEnd(int i) { this.nonEscapedCharAt(i) = ")" and not this.inCharSet(i) } + + private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) } + + predicate failedToParse(int i) { + exists(this.getChar(i)) and + not exists(int start, int end | + this.topLevel(start, end) and + start <= i and + end > i + ) + } + + /** Matches named character properties such as `\p{Word}` and `[[:digit:]]` */ + predicate namedCharacterProperty(int start, int end, string name) { + pStyleNamedCharacterProperty(start, end, name) or + posixStyleNamedCharacterProperty(start, end, name) + } + + /** Gets the name of the character property in start,end */ + string getCharacterPropertyName(int start, int end) { + this.namedCharacterProperty(start, end, result) + } + + /** Matches a POSIX bracket expression such as `[:alnum:]` within a character class. */ + private predicate posixStyleNamedCharacterProperty(int start, int end, string name) { + this.getChar(start) = "[" and + this.getChar(start + 1) = ":" and + end = + min(int e | + e > start and + this.getChar(e - 2) = ":" and + this.getChar(e - 1) = "]" + | + e + ) and + exists(int nameStart | + this.getChar(start + 2) = "^" and nameStart = start + 3 + or + not this.getChar(start + 2) = "^" and nameStart = start + 2 + | + name = this.getText().substring(nameStart, end - 2) + ) + } + + /** + * Matches named character properties. For example: + * - `\p{Space}` + * - `\P{Digit}` upper-case P means inverted + * - `\p{^Word}` caret also means inverted + * + * These can occur both inside and outside of character classes. + */ + private predicate pStyleNamedCharacterProperty(int start, int end, string name) { + this.escapingChar(start) and + this.getChar(start + 1) in ["p", "P"] and + this.getChar(start + 2) = "{" and + this.getChar(end - 1) = "}" and + end > start and + not exists(int i | start + 2 < i and i < end - 1 | this.getChar(i) = "}") and + exists(int nameStart | + this.getChar(start + 3) = "^" and nameStart = start + 4 + or + not this.getChar(start + 3) = "^" and nameStart = start + 3 + | + name = this.getText().substring(nameStart, end - 1) + ) + } + + /** + * Holds if the named character property is inverted. Examples for which it holds: + * - `\P{Digit}` upper-case P means inverted + * - `\p{^Word}` caret also means inverted + * - `[[:^digit:]]` + * + * Examples for which it doesn't hold: + * - `\p{Word}` + * - `\P{^Space}` - upper-case P and caret cancel each other out + * - `[[:alnum:]]` + */ + predicate namedCharacterPropertyIsInverted(int start, int end) { + this.pStyleNamedCharacterProperty(start, end, _) and + exists(boolean upperP, boolean caret | + (if this.getChar(start + 1) = "P" then upperP = true else upperP = false) and + (if this.getChar(start + 3) = "^" then caret = true else caret = false) + | + upperP.booleanXor(caret) = true + ) + or + this.posixStyleNamedCharacterProperty(start, end, _) and + this.getChar(start + 3) = "^" + } + + predicate escapedCharacter(int start, int end) { + this.escapingChar(start) and + not this.numberedBackreference(start, _, _) and + not this.namedBackreference(start, _, _) and + not this.pStyleNamedCharacterProperty(start, _, _) and + ( + // hex char \xhh + this.getChar(start + 1) = "x" and end = start + 4 + or + // wide hex char \uhhhh + this.getChar(start + 1) = "u" and end = start + 6 + or + // escape not handled above; update when adding a new case + not this.getChar(start + 1) in ["x", "u"] and + not exists(this.getChar(start + 1).toInt()) and + end = start + 2 + ) + } + + predicate inCharSet(int index) { + exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2]) + } + + predicate inPosixBracket(int index) { + exists(int x, int y | + this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2] + ) + } + + /** 'Simple' characters are any that don't alter the parsing of the regex. */ + private predicate simpleCharacter(int start, int end) { + end = start + 1 and + not this.charSet(start, _) and + not this.charSet(_, start + 1) and + not exists(int x, int y | + this.posixStyleNamedCharacterProperty(x, y, _) and + start >= x and + end <= y + ) and + exists(string c | c = this.getChar(start) | + exists(int x, int y, int z | + this.charSet(x, z) and + this.charSetStart(x, y) + | + start = y + or + start = z - 2 + or + start > y and start < z - 2 and not this.charRange(_, _, start, end, _) + ) + or + not this.inCharSet(start) and + not c = "(" and + not c = "[" and + not c = ")" and + not c = "|" and + not this.qualifier(start, _, _, _) + ) + } + + predicate character(int start, int end) { + ( + this.simpleCharacter(start, end) and + not exists(int x, int y | this.escapedCharacter(x, y) and x <= start and y >= end) + or + this.escapedCharacter(start, end) + ) and + not exists(int x, int y | this.groupStart(x, y) and x <= start and y >= end) and + not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end) and + not exists(int x, int y | + this.pStyleNamedCharacterProperty(x, y, _) and x <= start and y >= end + ) + } + + predicate normalCharacter(int start, int end) { + this.character(start, end) and + not this.specialCharacter(start, end, _) + } + + predicate specialCharacter(int start, int end, string char) { + this.character(start, end) and + not this.inCharSet(start) and + ( + end = start + 1 and + char = this.getChar(start) and + (char = "$" or char = "^" or char = ".") + or + end = start + 2 and + this.escapingChar(start) and + char = this.getText().substring(start, end) and + char = ["\\A", "\\Z", "\\z"] + ) + } + + /** Whether the text in the range `start,end` is a group */ + predicate group(int start, int end) { + this.groupContents(start, end, _, _) + or + this.emptyGroup(start, end) + } + + /** Gets the number of the group in start,end */ + int getGroupNumber(int start, int end) { + this.group(start, end) and + result = + count(int i | this.group(i, _) and i < start and not this.nonCapturingGroupStart(i, _)) + 1 + } + + /** Gets the name, if it has one, of the group in start,end */ + string getGroupName(int start, int end) { + this.group(start, end) and + exists(int nameEnd | + this.namedGroupStart(start, nameEnd) and + result = this.getText().substring(start + 4, nameEnd - 1) + ) + } + + /** Whether the text in the range start, end is a group and can match the empty string. */ + predicate zeroWidthMatch(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + this.positiveLookaheadAssertionGroup(start, end) + or + this.positiveLookbehindAssertionGroup(start, end) + } + + predicate emptyGroup(int start, int end) { + exists(int endm1 | end = endm1 + 1 | + this.groupStart(start, endm1) and + this.isGroupEnd(endm1) + ) + } + + private predicate emptyMatchAtStartGroup(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + this.positiveLookaheadAssertionGroup(start, end) + } + + private predicate emptyMatchAtEndGroup(int start, int end) { + this.emptyGroup(start, end) + or + this.negativeAssertionGroup(start, end) + or + this.positiveLookbehindAssertionGroup(start, end) + } + + private predicate negativeAssertionGroup(int start, int end) { + exists(int inStart | + this.negativeLookaheadAssertionStart(start, inStart) + or + this.negativeLookbehindAssertionStart(start, inStart) + | + this.groupContents(start, end, inStart, _) + ) + } + + predicate negativeLookaheadAssertionGroup(int start, int end) { + exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) | + this.groupContents(start, end, inStart, _) + ) + } + + predicate negativeLookbehindAssertionGroup(int start, int end) { + exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) | + this.groupContents(start, end, inStart, _) + ) + } + + predicate positiveLookaheadAssertionGroup(int start, int end) { + exists(int inStart | this.lookaheadAssertionStart(start, inStart) | + this.groupContents(start, end, inStart, _) + ) + } + + predicate positiveLookbehindAssertionGroup(int start, int end) { + exists(int inStart | this.lookbehindAssertionStart(start, inStart) | + this.groupContents(start, end, inStart, _) + ) + } + + private predicate groupStart(int start, int end) { + this.nonCapturingGroupStart(start, end) + or + this.namedGroupStart(start, end) + or + this.lookaheadAssertionStart(start, end) + or + this.negativeLookaheadAssertionStart(start, end) + or + this.lookbehindAssertionStart(start, end) + or + this.negativeLookbehindAssertionStart(start, end) + or + this.commentGroupStart(start, end) + or + this.simpleGroupStart(start, end) + } + + /** Matches the start of a non-capturing group, e.g. `(?:` */ + private predicate nonCapturingGroupStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) = "?" and + this.getChar(start + 2) = ":" and + end = start + 3 + } + + /** Matches the start of a simple group, e.g. `(a+)`. */ + private predicate simpleGroupStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) != "?" and + end = start + 1 + } + + /** + * Matches the start of a named group, such as: + * - `(?\w+)` + * - `(?'name'\w+)` + */ + private predicate namedGroupStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) = "?" and + ( + this.getChar(start + 2) = "<" and + not this.getChar(start + 3) = "=" and // (?<=foo) is a positive lookbehind assertion + not this.getChar(start + 3) = "!" and // (? start + 3 and this.getChar(i) = ">") and + end = nameEnd + 1 + ) + or + this.getChar(start + 2) = "'" and + exists(int nameEnd | + nameEnd = min(int i | i > start + 2 and this.getChar(i) = "'") and end = nameEnd + 1 + ) + ) + } + + /** Matches the start of a positive lookahead assertion, i.e. `(?=`. */ + private predicate lookaheadAssertionStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) = "?" and + this.getChar(start + 2) = "=" and + end = start + 3 + } + + /** Matches the start of a negative lookahead assertion, i.e. `(?!`. */ + private predicate negativeLookaheadAssertionStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) = "?" and + this.getChar(start + 2) = "!" and + end = start + 3 + } + + /** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */ + private predicate lookbehindAssertionStart(int start, int end) { + this.isGroupStart(start) and + this.getChar(start + 1) = "?" and + this.getChar(start + 2) = "<" and + this.getChar(start + 3) = "=" and + end = start + 4 + } + + /** Matches the start of a negative lookbehind assertion, i.e. `(?`. */ + predicate namedBackreference(int start, int end, string name) { + this.escapingChar(start) and + this.getChar(start + 1) = "k" and + this.getChar(start + 2) = "<" and + exists(int nameEnd | nameEnd = min(int i | i > start + 3 and this.getChar(i) = ">") | + end = nameEnd + 1 and + name = this.getText().substring(start + 3, nameEnd) + ) + } + + /** Matches a numbered backreference, e.g. `\1`. */ + predicate numberedBackreference(int start, int end, int value) { + this.escapingChar(start) and + not this.getChar(start + 1) = "0" and + exists(string text, string svalue, int len | + end = start + len and + text = this.getText() and + len in [2 .. 3] + | + svalue = text.substring(start + 1, start + len) and + value = svalue.toInt() and + not exists(text.substring(start + 1, start + len + 1).toInt()) and + value > 0 + ) + } + + /** Whether the text in the range `start,end` is a back reference */ + predicate backreference(int start, int end) { + this.numberedBackreference(start, end, _) + or + this.namedBackreference(start, end, _) + } + + /** Gets the number of the back reference in start,end */ + int getBackRefNumber(int start, int end) { this.numberedBackreference(start, end, result) } + + /** Gets the name, if it has one, of the back reference in start,end */ + string getBackRefName(int start, int end) { this.namedBackreference(start, end, result) } + + private predicate baseItem(int start, int end) { + this.character(start, end) and + not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end) + or + this.group(start, end) + or + this.charSet(start, end) + or + this.backreference(start, end) + or + this.pStyleNamedCharacterProperty(start, end, _) + } + + private predicate qualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) { + this.shortQualifier(start, end, maybeEmpty, mayRepeatForever) and + not this.getChar(end) = "?" + or + exists(int shortEnd | this.shortQualifier(start, shortEnd, maybeEmpty, mayRepeatForever) | + if this.getChar(shortEnd) = "?" then end = shortEnd + 1 else end = shortEnd + ) + } + + private predicate shortQualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) { + ( + this.getChar(start) = "+" and maybeEmpty = false and mayRepeatForever = true + or + this.getChar(start) = "*" and maybeEmpty = true and mayRepeatForever = true + or + this.getChar(start) = "?" and maybeEmpty = true and mayRepeatForever = false + ) and + end = start + 1 + or + exists(string lower, string upper | + this.multiples(start, end, lower, upper) and + (if lower = "" or lower.toInt() = 0 then maybeEmpty = true else maybeEmpty = false) and + if upper = "" then mayRepeatForever = true else mayRepeatForever = false + ) + } + + predicate multiples(int start, int end, string lower, string upper) { + exists(string text, string match, string inner | + text = this.getText() and + end = start + match.length() and + inner = match.substring(1, match.length() - 1) + | + match = text.regexpFind("\\{[0-9]+\\}", _, start) and + lower = inner and + upper = lower + or + match = text.regexpFind("\\{[0-9]*,[0-9]*\\}", _, start) and + exists(int commaIndex | + commaIndex = inner.indexOf(",") and + lower = inner.prefix(commaIndex) and + upper = inner.suffix(commaIndex + 1) + ) + ) + } + + /** + * Whether the text in the range start,end is a qualified item, where item is a character, + * a character set or a group. + */ + predicate qualifiedItem(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) { + this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever) + } + + predicate qualifiedPart( + int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever + ) { + this.baseItem(start, partEnd) and + this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever) + } + + predicate item(int start, int end) { + this.qualifiedItem(start, end, _, _) + or + this.baseItem(start, end) and not this.qualifier(end, _, _, _) + } + + private predicate subsequence(int start, int end) { + ( + start = 0 or + this.groupStart(_, start) or + this.isOptionDivider(start - 1) + ) and + this.item(start, end) + or + exists(int mid | + this.subsequence(start, mid) and + this.item(mid, end) + ) + } + + /** + * Whether the text in the range start,end is a sequence of 1 or more items, where an item is a character, + * a character set or a group. + */ + predicate sequence(int start, int end) { + this.sequenceOrQualified(start, end) and + not this.qualifiedItem(start, end, _, _) + } + + private predicate sequenceOrQualified(int start, int end) { + this.subsequence(start, end) and + not this.itemStart(end) + } + + private predicate itemStart(int start) { + this.character(start, _) or + this.isGroupStart(start) or + this.charSet(start, _) or + this.backreference(start, _) or + this.namedCharacterProperty(start, _, _) + } + + private predicate itemEnd(int end) { + this.character(_, end) + or + exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1) + or + this.charSet(_, end) + or + this.qualifier(_, end, _, _) + } + + private predicate topLevel(int start, int end) { + this.subalternation(start, end, _) and + not this.isOptionDivider(end) + } + + private predicate subalternation(int start, int end, int itemStart) { + this.sequenceOrQualified(start, end) and + not this.isOptionDivider(start - 1) and + itemStart = start + or + start = end and + not this.itemEnd(start) and + this.isOptionDivider(end) and + itemStart = start + or + exists(int mid | + this.subalternation(start, mid, _) and + this.isOptionDivider(mid) and + itemStart = mid + 1 + | + this.sequenceOrQualified(itemStart, end) + or + not this.itemStart(end) and end = itemStart + ) + } + + /** + * Whether the text in the range start,end is an alternation + */ + predicate alternation(int start, int end) { + this.topLevel(start, end) and + exists(int less | this.subalternation(start, less, _) and less < end) + } + + /** + * Whether the text in the range start,end is an alternation and the text in partStart, partEnd is one of the + * options in that alternation. + */ + predicate alternationOption(int start, int end, int partStart, int partEnd) { + this.alternation(start, end) and + this.subalternation(start, partEnd, partStart) + } + + /** A part of the regex that may match the start of the string. */ + private predicate firstPart(int start, int end) { + start = 0 and end = this.getText().length() + or + exists(int x | this.firstPart(x, end) | + this.emptyMatchAtStartGroup(x, start) + or + this.qualifiedItem(x, start, true, _) + or + // ^ and \A match the start of the string + this.specialCharacter(x, start, ["^", "\\A"]) + ) + or + exists(int y | this.firstPart(start, y) | + this.item(start, end) + or + this.qualifiedPart(start, end, y, _, _) + ) + or + exists(int x, int y | this.firstPart(x, y) | + this.groupContents(x, y, start, end) + or + this.alternationOption(x, y, start, end) + ) + } + + /** A part of the regex that may match the end of the string. */ + private predicate lastPart(int start, int end) { + start = 0 and end = this.getText().length() + or + exists(int y | this.lastPart(start, y) | + this.emptyMatchAtEndGroup(end, y) + or + this.qualifiedItem(end, y, true, _) + or + // $, \Z, and \z match the end of the string. + this.specialCharacter(end, y, ["$", "\\Z", "\\z"]) + ) + or + exists(int x | + this.lastPart(x, end) and + this.item(start, end) + ) + or + exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _, _)) + or + exists(int x, int y | this.lastPart(x, y) | + this.groupContents(x, y, start, end) + or + this.alternationOption(x, y, start, end) + ) + } + + /** + * Whether the item at [start, end) is one of the first items + * to be matched. + */ + predicate firstItem(int start, int end) { + ( + this.character(start, end) + or + this.qualifiedItem(start, end, _, _) + or + this.charSet(start, end) + ) and + this.firstPart(start, end) + } + + /** + * Whether the item at [start, end) is one of the last items + * to be matched. + */ + predicate lastItem(int start, int end) { + ( + this.character(start, end) + or + this.qualifiedItem(start, end, _, _) + or + this.charSet(start, end) + ) and + this.lastPart(start, end) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll new file mode 100644 index 00000000000..3d3655ad3a9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll @@ -0,0 +1,131 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about + * polynomial regular expression denial-of-service attacks, as well + * as extension points for adding your own. + */ + +private import codeql.ruby.AST as AST +private import codeql.ruby.CFG +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.regexp.ParseRegExp as RegExp +private import codeql.ruby.regexp.RegExpTreeView +private import codeql.ruby.regexp.SuperlinearBackTracking + +module PolynomialReDoS { + /** + * A data flow source node for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink node for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { + /** Gets the regex that is being executed by this node. */ + abstract RegExpTerm getRegExp(); + + /** Gets the node to highlight in the alert message. */ + DataFlow::Node getHighlight() { result = this } + } + + /** + * A sanitizer for polynomial regular expression denial-of-service vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A sanitizer guard for polynomial regular expression denial of service + * vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * Gets the AST of a regular expression object that can flow to `node`. + */ + RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) { + exists(DataFlow::LocalSourceNode regexp | + regexp.flowsTo(node) and + result = regexp.asExpr().(CfgNodes::ExprNodes::RegExpLiteralCfgNode).getExpr().getParsed() + ) + } + + /** + * A regexp match against a superlinear backtracking term, seen as a sink for + * polynomial regular expression denial-of-service vulnerabilities. + */ + class PolynomialBackTrackingTermMatch extends Sink { + PolynomialBackTrackingTerm term; + DataFlow::ExprNode matchNode; + + PolynomialBackTrackingTermMatch() { + exists(DataFlow::Node regexp | + term.getRootTerm() = getRegExpObjectFromNode(regexp) and + ( + // `=~` or `!~` + exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op | + matchNode.asExpr() = op and + ( + op.getExpr() instanceof AST::RegExpMatchExpr or + op.getExpr() instanceof AST::NoRegExpMatchExpr + ) and + ( + this.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand() + or + this.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand() + ) + ) + or + // Any of the methods on `String` that take a regexp. + exists(CfgNodes::ExprNodes::MethodCallCfgNode call | + matchNode.asExpr() = call and + call.getExpr().getMethodName() = + [ + "[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex", + "rpartition", "scan", "slice!", "split", "sub", "sub!" + ] and + this.asExpr() = call.getReceiver() and + regexp.asExpr() = call.getArgument(0) + ) + or + // A call to `match` or `match?` where the regexp is the receiver. + exists(CfgNodes::ExprNodes::MethodCallCfgNode call | + matchNode.asExpr() = call and + call.getExpr().getMethodName() = ["match", "match?"] and + regexp.asExpr() = call.getReceiver() and + this.asExpr() = call.getArgument(0) + ) + ) + ) + } + + override RegExpTerm getRegExp() { result = term } + + override DataFlow::Node getHighlight() { result = matchNode } + } + + /** + * A check on the length of a string, seen as a sanitizer guard. + */ + class LengthGuard extends SanitizerGuard, CfgNodes::ExprNodes::RelationalOperationCfgNode { + private DataFlow::Node input; + + LengthGuard() { + exists(DataFlow::CallNode length, DataFlow::ExprNode operand | + length.asExpr().getExpr().(AST::MethodCall).getMethodName() = "length" and + length.getReceiver() = input and + length.flowsTo(operand) and + operand.getExprNode() = this.getAnOperand() + ) + } + + override predicate checks(CfgNode node, boolean branch) { + node = input.asExpr() and branch = true + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll new file mode 100644 index 00000000000..db7269d7fdb --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll @@ -0,0 +1,37 @@ +/** + * Provides a taint tracking configuration for reasoning about polynomial + * regular expression denial-of-service attacks. + * + * Note, for performance reasons: only import this file if `Configuration` is + * needed. Otherwise, `PolynomialReDoSCustomizations` should be imported + * instead. + */ + +private import codeql.ruby.DataFlow +private import codeql.ruby.TaintTracking + +/** + * Provides a taint-tracking configuration for detecting polynomial regular + * expression denial of service vulnerabilities. + */ +module PolynomialReDoS { + import PolynomialReDoSCustomizations::PolynomialReDoS + + /** + * A taint-tracking configuration for detecting polynomial regular expression + * denial of service vulnerabilities. + */ + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "PolynomialReDoS" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard node) { + node instanceof SanitizerGuard + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll new file mode 100644 index 00000000000..496983ea849 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll @@ -0,0 +1,1186 @@ +/** + * Provides classes for working with regular expressions that can + * perform backtracking in superlinear/exponential time. + * + * This module contains a number of utility predicates for compiling a regular expression into a NFA and reasoning about this NFA. + * + * The `ReDoSConfiguration` contains a `isReDoSCandidate` predicate that is used to + * to determine which states the prefix/suffix search should happen on. + * There is only meant to exist one `ReDoSConfiguration` at a time. + * + * The predicate `hasReDoSResult` outputs a de-duplicated set of + * states that will cause backtracking (a rejecting suffix exists). + */ + +import RegExpTreeView +private import codeql.Locations + +/** + * A configuration for which parts of a regular expression should be considered relevant for + * the different predicates in `ReDoS.qll`. + * Used to adjust the computations for either superlinear or exponential backtracking. + */ +abstract class ReDoSConfiguration extends string { + bindingset[this] + ReDoSConfiguration() { any() } + + /** + * Holds if `state` with the pump string `pump` is a candidate for a + * ReDoS vulnerable state. + * This is used to determine which states are considered for the prefix/suffix construction. + */ + abstract predicate isReDoSCandidate(State state, string pump); +} + +/** + * Holds if repeating `pump' starting at `state` is a candidate for causing backtracking. + * No check whether a rejected suffix exists has been made. + */ +private predicate isReDoSCandidate(State state, string pump) { + any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and + ( + not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _) + or + epsilonSucc+(state) = state and + state = + max(State s, Location l | + s = epsilonSucc+(state) and + l = s.getRepr().getLocation() and + any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and + s.getRepr() instanceof InfiniteRepetitionQuantifier + | + s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine() + ) + ) +} + +/** + * Gets the char after `c` (from a simplified ASCII table). + */ +private string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) } + +/** + * Gets an approximation for the ASCII code for `char`. + * Only the easily printable chars are included (so no newline, tab, null, etc). + */ +private int ascii(string char) { + char = + rank[result](string c | + c = + "! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + .charAt(_) + ) +} + +/** + * Holds if `t` matches at least an epsilon symbol. + * + * That is, this term does not restrict the language of the enclosing regular expression. + * + * This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular. + */ +predicate matchesEpsilon(RegExpTerm t) { + t instanceof RegExpStar + or + t instanceof RegExpOpt + or + t.(RegExpRange).getLowerBound() = 0 + or + exists(RegExpTerm child | + child = t.getAChild() and + matchesEpsilon(child) + | + t instanceof RegExpAlt or + t instanceof RegExpGroup or + t instanceof RegExpPlus or + t instanceof RegExpRange + ) + or + matchesEpsilon(t.(RegExpBackRef).getGroup()) + or + forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child)) +} + +/** + * A lookahead/lookbehind that matches the empty string. + */ +class EmptyPositiveSubPatttern extends RegExpSubPattern { + EmptyPositiveSubPatttern() { + ( + this instanceof RegExpPositiveLookahead + or + this instanceof RegExpPositiveLookbehind + ) and + matchesEpsilon(this.getOperand()) + } +} + +/** + * A branch in a disjunction that is the root node in a literal, or a literal + * whose root node is not a disjunction. + */ +class RegExpRoot extends RegExpTerm { + RegExpParent parent; + + RegExpRoot() { + exists(RegExpAlt alt | + alt.isRootTerm() and + this = alt.getAChild() and + parent = alt.getParent() + ) + or + this.isRootTerm() and + not this instanceof RegExpAlt and + parent = this.getParent() + } + + /** + * Holds if this root term is relevant to the ReDoS analysis. + */ + predicate isRelevant() { + // there is at least one repetition + getRoot(any(InfiniteRepetitionQuantifier q)) = this and + // there are no lookbehinds + not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and + // is actually used as a RegExp + isUsedAsRegExp() //and + // // pragmatic performance optimization: ignore minified files. + // not getRootTerm().getParent().(Expr).getTopLevel().isMinified() + } +} + +/** + * A constant in a regular expression that represents valid Unicode character(s). + */ +private class RegexpCharacterConstant extends RegExpConstant { + RegexpCharacterConstant() { this.isCharacter() } +} + +/** + * Holds if `term` is the chosen canonical representative for all terms with string representation `str`. + * + * Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s. + * The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks. + */ +private predicate isCanonicalTerm(RegExpTerm term, string str) { + term = + rank[1](RegExpTerm t, Location loc, File file | + loc = t.getLocation() and + file = t.getFile() and + str = t.getRawValue() + | + t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn() + ) +} + +/** + * An abstract input symbol, representing a set of concrete characters. + */ +private newtype TInputSymbol = + /** An input symbol corresponding to character `c`. */ + Char(string c) { + c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_) + } or + /** + * An input symbol representing all characters matched by + * a (non-universal) character class that has string representation `charClassString`. + */ + CharClass(string charClassString) { + exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and + exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) | + recc instanceof RegExpCharacterClass and + not recc.(RegExpCharacterClass).isUniversalClass() + or + recc instanceof RegExpCharacterClassEscape + or + recc instanceof RegExpNamedCharacterProperty + ) + } or + /** An input symbol representing all characters matched by `.`. */ + Dot() or + /** An input symbol representing all characters. */ + Any() or + /** An epsilon transition in the automaton. */ + Epsilon() + +/** + * Gets the canonical CharClass for `term`. + */ +CharClass getCanonicalCharClass(RegExpTerm term) { + exists(string str | isCanonicalTerm(term, str) | result = CharClass(str)) +} + +/** + * Holds if `a` and `b` are input symbols from the same regexp. + */ +private predicate sharesRoot(TInputSymbol a, TInputSymbol b) { + exists(RegExpRoot root | + belongsTo(a, root) and + belongsTo(b, root) + ) +} + +/** + * Holds if the `a` is an input symbol from a regexp that has root `root`. + */ +private predicate belongsTo(TInputSymbol a, RegExpRoot root) { + exists(State s | getRoot(s.getRepr()) = root | + delta(s, a, _) + or + delta(_, a, s) + ) +} + +/** + * An abstract input symbol, representing a set of concrete characters. + */ +class InputSymbol extends TInputSymbol { + InputSymbol() { not this instanceof Epsilon } + + /** + * Gets a string representation of this input symbol. + */ + string toString() { + this = Char(result) + or + this = CharClass(result) + or + this = Dot() and result = "." + or + this = Any() and result = "[^]" + } +} + +/** + * An abstract input symbol that represents a character class. + */ +abstract private class CharacterClass extends InputSymbol { + /** + * Gets a character that is relevant for intersection-tests involving this + * character class. + * + * Specifically, this is any of the characters mentioned explicitly in the + * character class, offset by one if it is inverted. For character class escapes, + * the result is as if the class had been written out as a series of intervals. + * + * This set is large enough to ensure that for any two intersecting character + * classes, one contains a relevant character from the other. + */ + abstract string getARelevantChar(); + + /** + * Holds if this character class matches `char`. + */ + bindingset[char] + abstract predicate matches(string char); + + /** + * Gets a character matched by this character class. + */ + string choose() { result = getARelevantChar() and matches(result) } +} + +/** + * Provides implementations for `CharacterClass`. + */ +private module CharacterClasses { + /** + * Holds if the character class `cc` has a child (constant or range) that matches `char`. + */ + pragma[noinline] + predicate hasChildThatMatches(RegExpCharacterClass cc, string char) { + exists(getCanonicalCharClass(cc)) and + exists(RegExpTerm child | child = cc.getAChild() | + char = child.(RegexpCharacterConstant).getValue() + or + rangeMatchesOnLetterOrDigits(child, char) + or + not rangeMatchesOnLetterOrDigits(child, _) and + char = getARelevantChar() and + exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) | + lo <= char and + char <= hi + ) + or + exists(RegExpCharacterClassEscape escape | escape = child | + escape.getValue() = escape.getValue().toLowerCase() and + classEscapeMatches(escape.getValue(), char) + or + char = getARelevantChar() and + escape.getValue() = escape.getValue().toUpperCase() and + not classEscapeMatches(escape.getValue().toLowerCase(), char) + ) + or + exists(RegExpNamedCharacterProperty charProp | charProp = child | + not charProp.isInverted() and + namedCharacterPropertyMatches(charProp.getName(), char) + or + char = getARelevantChar() and + charProp.isInverted() and + not namedCharacterPropertyMatches(charProp.getName(), char) + ) + ) + } + + /** + * Holds if `range` is a range on lower-case, upper-case, or digits, and matches `char`. + * This predicate is used to restrict the searchspace for ranges by only joining `getAnyPossiblyMatchedChar` + * on a few ranges. + */ + private predicate rangeMatchesOnLetterOrDigits(RegExpCharacterRange range, string char) { + exists(string lo, string hi | + range.isRange(lo, hi) and lo = lowercaseLetter() and hi = lowercaseLetter() + | + lo <= char and + char <= hi and + char = lowercaseLetter() + ) + or + exists(string lo, string hi | + range.isRange(lo, hi) and lo = upperCaseLetter() and hi = upperCaseLetter() + | + lo <= char and + char <= hi and + char = upperCaseLetter() + ) + or + exists(string lo, string hi | range.isRange(lo, hi) and lo = digit() and hi = digit() | + lo <= char and + char <= hi and + char = digit() + ) + } + + private string lowercaseLetter() { result = "abdcefghijklmnopqrstuvwxyz".charAt(_) } + + private string upperCaseLetter() { result = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".charAt(_) } + + private string digit() { result = [0 .. 9].toString() } + + /** + * Gets a char that could be matched by a regular expression. + * Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`. + */ + string getARelevantChar() { + exists(ascii(result)) + or + exists(RegexpCharacterConstant c | result = c.getValue().charAt(_)) + or + classEscapeMatches(_, result) + } + + /** + * Gets a char that is mentioned in the character class `c`. + */ + private string getAMentionedChar(RegExpCharacterClass c) { + exists(RegExpTerm child | child = c.getAChild() | + result = child.(RegexpCharacterConstant).getValue() + or + child.(RegExpCharacterRange).isRange(result, _) + or + child.(RegExpCharacterRange).isRange(_, result) + or + exists(RegExpCharacterClassEscape escape | child = escape | + result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s)) + or + result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s)) + ) + or + exists(RegExpNamedCharacterProperty charProp | child = charProp | + result = min(string s | namedCharacterPropertyMatches(charProp.getName(), s)) + or + result = max(string s | namedCharacterPropertyMatches(charProp.getName(), s)) + ) + ) + } + + /** + * An implementation of `CharacterClass` for positive (non inverted) character classes. + */ + private class PositiveCharacterClass extends CharacterClass { + RegExpCharacterClass cc; + + PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() } + + override string getARelevantChar() { result = getAMentionedChar(cc) } + + override predicate matches(string char) { hasChildThatMatches(cc, char) } + } + + /** + * An implementation of `CharacterClass` for inverted character classes. + */ + private class InvertedCharacterClass extends CharacterClass { + RegExpCharacterClass cc; + + InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() } + + override string getARelevantChar() { + result = nextChar(getAMentionedChar(cc)) or + nextChar(result) = getAMentionedChar(cc) + } + + bindingset[char] + override predicate matches(string char) { not hasChildThatMatches(cc, char) } + } + + /** + * Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`. + */ + pragma[noinline] + private predicate classEscapeMatches(string clazz, string char) { + clazz = "d" and + char = "0123456789".charAt(_) + or + clazz = "s" and + char = [" ", "\t", "\r", "\n", 11.toUnicode(), 12.toUnicode()] // 11.toUnicode() = \v, 12.toUnicode() = \f' + or + clazz = "w" and + char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_) + } + + /** + * Holds if the named character property (e.g. from a POSIX bracket + * expression) `propName` matches `char`. For example, it holds when `name` is + * `"word"` and `char` is `"a"`. + * + * TODO: expand to cover more properties. + */ + private predicate namedCharacterPropertyMatches(string propName, string char) { + propName = ["digit", "Digit"] and + char = "0123456789".charAt(_) + or + propName = ["space", "Space"] and + ( + char = [" ", "\t", "\r", "\n"] + or + char = getARelevantChar() and + char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed) + ) + or + propName = ["word", "Word"] and + char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_) + } + + /** + * An implementation of `CharacterClass` for \d, \s, and \w. + */ + private class PositiveCharacterClassEscape extends CharacterClass { + RegExpCharacterClassEscape cc; + + PositiveCharacterClassEscape() { + this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"] + } + + override string getARelevantChar() { + cc.getValue() = "d" and + result = ["0", "9"] + or + cc.getValue() = "s" and + result = [" "] + or + cc.getValue() = "w" and + result = ["a", "Z", "_", "0", "9"] + } + + override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) } + + override string choose() { + cc.getValue() = "d" and + result = "9" + or + cc.getValue() = "s" and + result = [" "] + or + cc.getValue() = "w" and + result = "a" + } + } + + /** + * An implementation of `CharacterClass` for \D, \S, and \W. + */ + private class NegativeCharacterClassEscape extends CharacterClass { + RegExpCharacterClassEscape cc; + + NegativeCharacterClassEscape() { + this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"] + } + + override string getARelevantChar() { + cc.getValue() = "D" and + result = ["a", "Z", "!"] + or + cc.getValue() = "S" and + result = ["a", "9", "!"] + or + cc.getValue() = "W" and + result = [" ", "!"] + } + + bindingset[char] + override predicate matches(string char) { + not classEscapeMatches(cc.getValue().toLowerCase(), char) + } + } + + /** + * An implementation of `NamedCharacterProperty` for positive (non-inverted) + * character properties. + */ + private class PositiveNamedCharacterProperty extends CharacterClass { + RegExpNamedCharacterProperty cp; + + PositiveNamedCharacterProperty() { this = getCanonicalCharClass(cp) and not cp.isInverted() } + + override string getARelevantChar() { + exists(string lowerName | lowerName = cp.getName().toLowerCase() | + lowerName = "digit" and + result = ["0", "9"] + or + lowerName = "space" and + result = [" "] + or + lowerName = "word" and + result = ["a", "Z", "_", "0", "9"] + ) + } + + override predicate matches(string char) { namedCharacterPropertyMatches(cp.getName(), char) } + + override string choose() { + exists(string lowerName | lowerName = cp.getName().toLowerCase() | + lowerName = "digit" and + result = "9" + or + lowerName = "space" and + result = " " + or + lowerName = "word" and + result = "a" + ) + } + } + + private class InvertedNamedCharacterProperty extends CharacterClass { + RegExpNamedCharacterProperty cp; + + InvertedNamedCharacterProperty() { this = getCanonicalCharClass(cp) and cp.isInverted() } + + override string getARelevantChar() { + exists(string lowerName | lowerName = cp.getName().toLowerCase() | + lowerName = "digit" and + result = ["a", "Z", "!"] + or + lowerName = "space" and + result = ["a", "9", "!"] + or + lowerName = "word" and + result = [" ", "!"] + ) + } + + bindingset[char] + override predicate matches(string char) { + not namedCharacterPropertyMatches(cp.getName(), char) + } + } +} + +private class EdgeLabel extends TInputSymbol { + string toString() { + this = Epsilon() and result = "" + or + exists(InputSymbol s | this = s and result = s.toString()) + } +} + +/** + * Gets the state before matching `t`. + */ +pragma[inline] +private State before(RegExpTerm t) { result = Match(t, 0) } + +/** + * Gets a state the NFA may be in after matching `t`. + */ +private State after(RegExpTerm t) { + exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt)) + or + exists(RegExpSequence seq, int i | t = seq.getChild(i) | + result = before(seq.getChild(i + 1)) + or + i + 1 = seq.getNumChild() and result = after(seq) + ) + or + exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp)) + or + exists(RegExpStar star | t = star.getAChild() | result = before(star)) + or + exists(RegExpPlus plus | t = plus.getAChild() | + result = before(plus) or + result = after(plus) + ) + or + exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt)) + or + exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root)) +} + +/** + * Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`. + */ +predicate delta(State q1, EdgeLabel lbl, State q2) { + exists(RegexpCharacterConstant s, int i | + q1 = Match(s, i) and + lbl = Char(s.getValue().charAt(i)) and + ( + q2 = Match(s, i + 1) + or + s.getValue().length() = i + 1 and + q2 = after(s) + ) + ) + or + exists(RegExpDot dot | q1 = before(dot) and q2 = after(dot) | + if dot.getLiteral().isDotAll() then lbl = Any() else lbl = Dot() + ) + or + exists(RegExpCharacterClass cc | + cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc) + or + q1 = before(cc) and + lbl = CharClass(cc.getRawValue()) and + q2 = after(cc) + ) + or + exists(RegExpCharacterClassEscape cc | + q1 = before(cc) and + lbl = CharClass(cc.getRawValue()) and + q2 = after(cc) + ) + or + exists(RegExpNamedCharacterProperty cp | + q1 = before(cp) and + lbl = CharClass(cp.getRawValue()) and + q2 = after(cp) + ) + or + exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild())) + or + exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0))) + or + exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0))) + or + exists(RegExpStar star | lbl = Epsilon() | + q1 = before(star) and q2 = before(star.getChild(0)) + or + q1 = before(star) and q2 = after(star) + ) + or + exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0))) + or + exists(RegExpOpt opt | lbl = Epsilon() | + q1 = before(opt) and q2 = before(opt.getChild(0)) + or + q1 = before(opt) and q2 = after(opt) + ) + or + exists(RegExpRoot root | q1 = AcceptAnySuffix(root) | + lbl = Any() and q2 = q1 + or + lbl = Epsilon() and q2 = Accept(root) + ) + or + exists(RegExpRoot root | q1 = Match(root, 0) | lbl = Any() and q2 = q1) + or + exists(RegExpDollar dollar | q1 = before(dollar) | + lbl = Epsilon() and q2 = Accept(getRoot(dollar)) + ) + or + exists(EmptyPositiveSubPatttern empty | q1 = before(empty) | + lbl = Epsilon() and q2 = after(empty) + ) +} + +/** + * Gets a state that `q` has an epsilon transition to. + */ +State epsilonSucc(State q) { delta(q, Epsilon(), result) } + +/** + * Gets a state that has an epsilon transition to `q`. + */ +State epsilonPred(State q) { q = epsilonSucc(result) } + +/** + * Holds if there is a state `q` that can be reached from `q1` + * along epsilon edges, such that there is a transition from + * `q` to `q2` that consumes symbol `s`. + */ +predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1), s, q2) } + +/** + * Gets the root containing the given term, that is, the root of the literal, + * or a branch of the root disjunction. + */ +RegExpRoot getRoot(RegExpTerm term) { + result = term or + result = getRoot(term.getParent()) +} + +private newtype TState = + Match(RegExpTerm t, int i) { + getRoot(t).isRelevant() and + ( + i = 0 + or + exists(t.(RegexpCharacterConstant).getValue().charAt(i)) + ) + } or + Accept(RegExpRoot l) { l.isRelevant() } or + AcceptAnySuffix(RegExpRoot l) { l.isRelevant() } + +/** + * Gets a state that is about to match the regular expression `t`. + */ +State mkMatch(RegExpTerm t) { result = Match(t, 0) } + +/** + * A state in the NFA corresponding to a regular expression. + * + * Each regular expression literal `l` has one accepting state + * `Accept(l)`, one state that accepts all suffixes `AcceptAnySuffix(l)`, + * and a state `Match(t, i)` for every subterm `t`, + * which represents the state of the NFA before starting to + * match `t`, or the `i`th character in `t` if `t` is a constant. + */ +class State extends TState { + RegExpTerm repr; + + State() { + this = Match(repr, _) or + this = Accept(repr) or + this = AcceptAnySuffix(repr) + } + + /** + * Gets a string representation for this state in a regular expression. + */ + string toString() { + exists(int i | this = Match(repr, i) | result = "Match(" + repr + "," + i + ")") + or + this instanceof Accept and + result = "Accept(" + repr + ")" + or + this instanceof AcceptAnySuffix and + result = "AcceptAny(" + repr + ")" + } + + /** + * Gets the location for this state. + */ + Location getLocation() { result = repr.getLocation() } + + /** + * Gets the term represented by this state. + */ + RegExpTerm getRepr() { result = repr } +} + +/** + * Gets the minimum char that is matched by both the character classes `c` and `d`. + */ +private string getMinOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) { + result = min(getAOverlapBetweenCharacterClasses(c, d)) +} + +/** + * Gets a char that is matched by both the character classes `c` and `d`. + * And `c` and `d` is not the same character class. + */ +private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) { + sharesRoot(c, d) and + result = [c.getARelevantChar(), d.getARelevantChar()] and + c.matches(result) and + d.matches(result) and + not c = d +} + +/** + * Gets a character that is represented by both `c` and `d`. + */ +string intersect(InputSymbol c, InputSymbol d) { + (sharesRoot(c, d) or [c, d] = Any()) and + ( + c = Char(result) and + d = getAnInputSymbolMatching(result) + or + result = getMinOverlapBetweenCharacterClasses(c, d) + or + result = c.(CharacterClass).choose() and + ( + d = c + or + d = Dot() and + not (result = "\n" or result = "\r") + or + d = Any() + ) + or + (c = Dot() or c = Any()) and + (d = Dot() or d = Any()) and + result = "a" + ) + or + result = intersect(d, c) +} + +/** + * Gets a symbol that matches `char`. + */ +bindingset[char] +InputSymbol getAnInputSymbolMatching(string char) { + result = Char(char) + or + result.(CharacterClass).matches(char) + or + result = Dot() and + not (char = "\n" or char = "\r") + or + result = Any() +} + +/** + * Predicates for constructing a prefix string that leads to a given state. + */ +private module PrefixConstruction { + /** + * Holds if `state` starts the string matched by the regular expression. + */ + private predicate isStartState(State state) { + state instanceof StateInPumpableRegexp and + ( + state = Match(any(RegExpRoot r), _) + or + exists(RegExpCaret car | state = after(car)) + ) + } + + /** + * Holds if `state` is the textually last start state for the regular expression. + */ + private predicate lastStartState(State state) { + exists(RegExpRoot root | + state = + max(State s, Location l | + isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation() + | + s + order by + l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(), + l.getEndLine() + ) + ) + } + + /** + * Holds if there exists any transition (Epsilon() or other) from `a` to `b`. + */ + private predicate existsTransition(State a, State b) { delta(a, _, b) } + + /** + * Gets the minimum number of transitions it takes to reach `state` from the `start` state. + */ + int prefixLength(State start, State state) = + shortestDistances(lastStartState/1, existsTransition/2)(start, state, result) + + /** + * Gets the minimum number of transitions it takes to reach `state` from the start state. + */ + private int lengthFromStart(State state) { result = prefixLength(_, state) } + + /** + * Gets a string for which the regular expression will reach `state`. + * + * Has at most one result for any given `state`. + * This predicate will not always have a result even if there is a ReDoS issue in + * the regular expression. + */ + string prefix(State state) { + lastStartState(state) and + result = "" + or + // the search stops past the last redos candidate state. + lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and + exists(State prev | + // select a unique predecessor (by an arbitrary measure) + prev = + min(State s, Location loc | + lengthFromStart(s) = lengthFromStart(state) - 1 and + loc = s.getRepr().getLocation() and + delta(s, _, state) + | + s + order by + loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(), + s.getRepr().toString() + ) + | + // greedy search for the shortest prefix + result = prefix(prev) and delta(prev, Epsilon(), state) + or + not delta(prev, Epsilon(), state) and + result = prefix(prev) + getCanonicalEdgeChar(prev, state) + ) + } + + /** + * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA. + */ + private string getCanonicalEdgeChar(State prev, State next) { + result = + min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next)) + } + + /** + * A state within a regular expression that has a pumpable state. + */ + class StateInPumpableRegexp extends State { + pragma[noinline] + StateInPumpableRegexp() { + exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr())) + } + } +} + +/** + * Predicates for testing the presence of a rejecting suffix. + * + * These predicates are used to ensure that the all states reached from the fork + * by repeating `w` have a rejecting suffix. + * + * For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is + * some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix. + * + * A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix, + * as the suffix "X" will cause both the regular expressions to be rejected. + * + * The string `w` is repeated any number of times because it needs to be + * infinitely repeatedable for the attack to work. + * For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork + * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes. + */ +private module SuffixConstruction { + import PrefixConstruction + + /** + * Holds if all states reachable from `fork` by repeating `w` + * are likely rejectable by appending some suffix. + */ + predicate reachesOnlyRejectableSuffixes(State fork, string w) { + isReDoSCandidate(fork, w) and + forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) + } + + /** + * Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected. + * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs. + */ + pragma[noinline] + private predicate isLikelyRejectable(StateInPumpableRegexp s) { + // exists a reject edge with some char. + hasRejectEdge(s) + or + hasEdgeToLikelyRejectable(s) + or + // stopping here is rejection + isRejectState(s) + } + + /** + * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state. + */ + predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) } + + /** + * Holds if there is likely a non-empty suffix leading to rejection starting in `s`. + */ + pragma[noopt] + predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) { + // all edges (at least one) with some char leads to another state that is rejectable. + // the `next` states might not share a common suffix, which can cause FPs. + exists(string char | char = hasEdgeToLikelyRejectableHelper(s) | + // noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order. + exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and + forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) + ) + } + + /** + * Gets a char for there exists a transition away from `s`, + * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`. + */ + pragma[noinline] + private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) { + not hasRejectEdge(s) and + not isRejectState(s) and + deltaClosedChar(s, result, _) + } + + /** + * Holds if there is a state `next` that can be reached from `prev` + * along epsilon edges, such that there is a transition from + * `prev` to `next` that the character symbol `char`. + */ + predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) { + deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next) + } + + pragma[noinline] + InputSymbol getAnInputSymbolMatchingRelevant(string char) { + char = relevant(_) and + result = getAnInputSymbolMatching(char) + } + + /** + * Gets a char used for finding possible suffixes inside `root`. + */ + pragma[noinline] + private string relevant(RegExpRoot root) { + exists(ascii(result)) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] + } + + /** + * Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA. + * The NFA does not model reject states, so the above is the same as saying there is a reject edge. + */ + private predicate hasRejectEdge(State s) { + hasSimpleRejectEdge(s) + or + not hasSimpleRejectEdge(s) and + exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _)) + } + + /** + * Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA. + * This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`. + */ + private predicate hasSimpleRejectEdge(State s) { + // The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`. + exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _)) + } + + /** + * Gets a state that can be reached from pumpable `fork` consuming all + * chars in `w` any number of times followed by the first `i+1` characters of `w`. + */ + pragma[noopt] + private State process(State fork, string w, int i) { + exists(State prev | prev = getProcessPrevious(fork, i, w) | + exists(string char, InputSymbol sym | + char = w.charAt(i) and + deltaClosed(prev, sym, result) and + // noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`. + // Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found. + sym = getAProcessInputSymbol(char) + ) + ) + } + + /** + * Gets a state that can be reached from pumpable `fork` consuming all + * chars in `w` any number of times followed by the first `i` characters of `w`. + */ + private State getProcessPrevious(State fork, int i, string w) { + isReDoSCandidate(fork, w) and + ( + i = 0 and result = fork + or + result = process(fork, w, i - 1) + or + // repeat until fixpoint + i = 0 and + result = process(fork, w, w.length() - 1) + ) + } + + /** + * Gets an InputSymbol that matches `char`. + * The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate. + */ + private InputSymbol getAProcessInputSymbol(string char) { + char = getAProcessChar() and + result = getAnInputSymbolMatching(char) + } + + /** + * Gets a `char` that occurs in a `pump` string. + */ + private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) } +} + +/** + * Gets the result of backslash-escaping newlines, carriage-returns and + * backslashes in `s`. + */ +bindingset[s] +private string escape(string s) { + result = + s.replaceAll("\\", "\\\\") + .replaceAll("\n", "\\n") + .replaceAll("\r", "\\r") + .replaceAll("\t", "\\t") +} + +/** + * Gets `str` with the last `i` characters moved to the front. + * + * We use this to adjust the pump string to match with the beginning of + * a RegExpTerm, so it doesn't start in the middle of a constant. + */ +bindingset[str, i] +private string rotate(string str, int i) { + result = str.suffix(str.length() - i) + str.prefix(str.length() - i) +} + +/** + * Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`. + * Gets the shortest string that causes superlinear backtracking. + */ +private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) { + exists(int i, string c | s = Match(term, i) | + c = + min(string w | + any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and + SuffixConstruction::reachesOnlyRejectableSuffixes(s, w) + | + w order by w.length(), w + ) and + pump = escape(rotate(c, i)) + ) +} + +/** + * Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`. + * + * `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found). + */ +predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) { + not t.getRegExp().hasFreeSpacingFlag() and // exclude free-spacing mode regexes + isReDoSAttackable(t, pump, s) and + ( + prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and + not PrefixConstruction::prefix(s) = "" + or + PrefixConstruction::prefix(s) = "" and prefixMsg = "" + or + not exists(PrefixConstruction::prefix(s)) and prefixMsg = "" + ) +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll new file mode 100644 index 00000000000..11fd0836ce1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll @@ -0,0 +1,724 @@ +private import codeql.ruby.ast.Literal as AST +private import codeql.Locations +private import ParseRegExp + +/** + * An element containing a regular expression term, that is, either + * a string literal (parsed as a regular expression) + * or another regular expression term. + */ +class RegExpParent extends TRegExpParent { + string toString() { result = "RegExpParent" } + + RegExpTerm getChild(int i) { none() } + + RegExpTerm getAChild() { result = getChild(_) } + + int getNumChild() { result = count(getAChild()) } + + /** + * Gets the name of a primary CodeQL class to which this regular + * expression term belongs. + */ + string getAPrimaryQlClass() { result = "RegExpParent" } + + /** + * Gets a comma-separated list of the names of the primary CodeQL classes to + * which this regular expression term belongs. + */ + final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") } +} + +class RegExpLiteral extends TRegExpLiteral, RegExpParent { + RegExp re; + + RegExpLiteral() { this = TRegExpLiteral(re) } + + override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() } + + predicate isDotAll() { re.hasMultilineFlag() } + + override string getAPrimaryQlClass() { result = "RegExpLiteral" } +} + +class RegExpTerm extends RegExpParent { + RegExp re; + int start; + int end; + + RegExpTerm() { + this = TRegExpAlt(re, start, end) + or + this = TRegExpBackRef(re, start, end) + or + this = TRegExpCharacterClass(re, start, end) + or + this = TRegExpCharacterRange(re, start, end) + or + this = TRegExpNormalChar(re, start, end) + or + this = TRegExpGroup(re, start, end) + or + this = TRegExpQuantifier(re, start, end) + or + this = TRegExpSequence(re, start, end) and + exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead. + or + this = TRegExpSpecialChar(re, start, end) + or + this = TRegExpNamedCharacterProperty(re, start, end) + } + + RegExpTerm getRootTerm() { + this.isRootTerm() and result = this + or + result = getParent().(RegExpTerm).getRootTerm() + } + + predicate isUsedAsRegExp() { any() } + + predicate isRootTerm() { start = 0 and end = re.getText().length() } + + override RegExpTerm getChild(int i) { + result = this.(RegExpAlt).getChild(i) + or + result = this.(RegExpBackRef).getChild(i) + or + result = this.(RegExpCharacterClass).getChild(i) + or + result = this.(RegExpCharacterRange).getChild(i) + or + result = this.(RegExpNormalChar).getChild(i) + or + result = this.(RegExpGroup).getChild(i) + or + result = this.(RegExpQuantifier).getChild(i) + or + result = this.(RegExpSequence).getChild(i) + or + result = this.(RegExpSpecialChar).getChild(i) + or + result = this.(RegExpNamedCharacterProperty).getChild(i) + } + + RegExpParent getParent() { result.getAChild() = this } + + RegExp getRegExp() { result = re } + + int getStart() { result = start } + + int getEnd() { result = end } + + override string toString() { result = re.getText().substring(start, end) } + + override string getAPrimaryQlClass() { result = "RegExpTerm" } + + Location getLocation() { result = re.getLocation() } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(int re_start, int re_end | + re.getComponent(0).getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and + re.getComponent(re.getNumberOfComponents() - 1) + .getLocation() + .hasLocationInfo(filepath, _, _, endline, re_end) + | + startcolumn = re_start + start and + endcolumn = re_start + end - 1 + ) + } + + File getFile() { result = this.getLocation().getFile() } + + string getRawValue() { result = this.toString() } + + RegExpLiteral getLiteral() { result = TRegExpLiteral(re) } + + /** Gets the regular expression term that is matched (textually) before this one, if any. */ + RegExpTerm getPredecessor() { + exists(RegExpTerm parent | parent = getParent() | + result = parent.(RegExpSequence).previousElement(this) + or + not exists(parent.(RegExpSequence).previousElement(this)) and + not parent instanceof RegExpSubPattern and + result = parent.getPredecessor() + ) + } + + /** Gets the regular expression term that is matched (textually) after this one, if any. */ + RegExpTerm getSuccessor() { + exists(RegExpTerm parent | parent = getParent() | + result = parent.(RegExpSequence).nextElement(this) + or + not exists(parent.(RegExpSequence).nextElement(this)) and + not parent instanceof RegExpSubPattern and + result = parent.getSuccessor() + ) + } +} + +newtype TRegExpParent = + TRegExpLiteral(RegExp re) or + TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or + TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or + TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or + TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or + TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or + TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or + TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or + TRegExpNormalChar(RegExp re, int start, int end) { re.normalCharacter(start, end) } or + TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or + TRegExpNamedCharacterProperty(RegExp re, int start, int end) { + re.namedCharacterProperty(start, end, _) + } + +class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier { + int part_end; + boolean maybe_empty; + boolean may_repeat_forever; + + RegExpQuantifier() { + this = TRegExpQuantifier(re, start, end) and + re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever) + } + + override RegExpTerm getChild(int i) { + i = 0 and + result.getRegExp() = re and + result.getStart() = start and + result.getEnd() = part_end + } + + predicate mayRepeatForever() { may_repeat_forever = true } + + string getQualifier() { result = re.getText().substring(part_end, end) } + + override string getAPrimaryQlClass() { result = "RegExpQuantifier" } +} + +class InfiniteRepetitionQuantifier extends RegExpQuantifier { + InfiniteRepetitionQuantifier() { this.mayRepeatForever() } + + override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" } +} + +class RegExpStar extends InfiniteRepetitionQuantifier { + RegExpStar() { this.getQualifier().charAt(0) = "*" } + + override string getAPrimaryQlClass() { result = "RegExpStar" } +} + +class RegExpPlus extends InfiniteRepetitionQuantifier { + RegExpPlus() { this.getQualifier().charAt(0) = "+" } + + override string getAPrimaryQlClass() { result = "RegExpPlus" } +} + +class RegExpOpt extends RegExpQuantifier { + RegExpOpt() { this.getQualifier().charAt(0) = "?" } + + override string getAPrimaryQlClass() { result = "RegExpOpt" } +} + +class RegExpRange extends RegExpQuantifier { + string upper; + string lower; + + RegExpRange() { re.multiples(part_end, end, lower, upper) } + + string getUpper() { result = upper } + + string getLower() { result = lower } + + /** + * Gets the upper bound of the range, if any. + * + * If there is no upper bound, any number of repetitions is allowed. + * For a term of the form `r{lo}`, both the lower and the upper bound + * are `lo`. + */ + int getUpperBound() { result = this.getUpper().toInt() } + + /** Gets the lower bound of the range. */ + int getLowerBound() { result = this.getLower().toInt() } + + override string getAPrimaryQlClass() { result = "RegExpRange" } +} + +class RegExpSequence extends RegExpTerm, TRegExpSequence { + RegExpSequence() { + this = TRegExpSequence(re, start, end) and + exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead. + } + + override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) } + + /** Gets the element preceding `element` in this sequence. */ + RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) } + + /** Gets the element following `element` in this sequence. */ + RegExpTerm nextElement(RegExpTerm element) { + exists(int i | + element = this.getChild(i) and + result = this.getChild(i + 1) + ) + } + + override string getAPrimaryQlClass() { result = "RegExpSequence" } +} + +pragma[nomagic] +private int seqChildEnd(RegExp re, int start, int end, int i) { + result = seqChild(re, start, end, i).getEnd() +} + +// moved out so we can use it in the charpred +private RegExpTerm seqChild(RegExp re, int start, int end, int i) { + re.sequence(start, end) and + ( + i = 0 and + result.getRegExp() = re and + result.getStart() = start and + exists(int itemEnd | + re.item(start, itemEnd) and + result.getEnd() = itemEnd + ) + or + i > 0 and + result.getRegExp() = re and + exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) | + result.getStart() = itemStart and + re.item(itemStart, result.getEnd()) + ) + ) +} + +class RegExpAlt extends RegExpTerm, TRegExpAlt { + RegExpAlt() { this = TRegExpAlt(re, start, end) } + + override RegExpTerm getChild(int i) { + i = 0 and + result.getRegExp() = re and + result.getStart() = start and + exists(int part_end | + re.alternationOption(start, end, start, part_end) and + result.getEnd() = part_end + ) + or + i > 0 and + result.getRegExp() = re and + exists(int part_start | + part_start = this.getChild(i - 1).getEnd() + 1 // allow for the | + | + result.getStart() = part_start and + re.alternationOption(start, end, part_start, result.getEnd()) + ) + } + + override string getAPrimaryQlClass() { result = "RegExpAlt" } +} + +class RegExpEscape extends RegExpNormalChar { + RegExpEscape() { re.escapedCharacter(start, end) } + + /** + * Gets the name of the escaped; for example, `w` for `\w`. + * TODO: Handle named escapes. + */ + override string getValue() { + this.isIdentityEscape() and result = this.getUnescaped() + or + this.getUnescaped() = "n" and result = "\n" + or + this.getUnescaped() = "r" and result = "\r" + or + this.getUnescaped() = "t" and result = "\t" + or + isUnicode() and + result = getUnicode() + } + + predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t"] } + + /** + * Gets the text for this escape. That is e.g. "\w". + */ + private string getText() { result = re.getText().substring(start, end) } + + /** + * Holds if this is a unicode escape. + */ + private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] } + + /** + * Gets the unicode char for this escape. + * E.g. for `\u0061` this returns "a". + */ + private string getUnicode() { + exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) | + result = codepoint.toUnicode() + ) + } + + /** + * Gets int value for the `index`th char in the hex number of the unicode escape. + * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex). + */ + private int getHexValueFromUnicode(int index) { + isUnicode() and + exists(string hex, string char | hex = getText().suffix(2) | + char = hex.charAt(index) and + result = 16.pow(hex.length() - index - 1) * toHex(char) + ) + } + + string getUnescaped() { result = this.getText().suffix(1) } + + override string getAPrimaryQlClass() { result = "RegExpEscape" } +} + +/** + * Gets the hex number for the `hex` char. + */ +private int toHex(string hex) { + hex = [0 .. 9].toString() and + result = hex.toInt() + or + result = 10 and hex = ["a", "A"] + or + result = 11 and hex = ["b", "B"] + or + result = 12 and hex = ["c", "C"] + or + result = 13 and hex = ["d", "D"] + or + result = 14 and hex = ["e", "E"] + or + result = 15 and hex = ["f", "F"] +} + +/** + * A character class escape in a regular expression. + * That is, an escaped character that denotes multiple characters. + * + * Examples: + * + * ``` + * \w + * \S + * ``` + */ +class RegExpCharacterClassEscape extends RegExpEscape { + RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] } + + /** Gets the name of the character class; for example, `w` for `\w`. */ + // override string getValue() { result = value } + override RegExpTerm getChild(int i) { none() } + + override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" } +} + +/** + * A character class. + * + * Examples: + * + * ```rb + * /[a-fA-F0-9]/ + * /[^abc]/ + * ``` + */ +class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass { + RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) } + + predicate isInverted() { re.getChar(start + 1) = "^" } + + predicate isUniversalClass() { + // [^] + isInverted() and not exists(getAChild()) + or + // [\w\W] and similar + not isInverted() and + exists(string cce1, string cce2 | + cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and + cce2 = getAChild().(RegExpCharacterClassEscape).getValue() + | + cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase() + ) + } + + override RegExpTerm getChild(int i) { + i = 0 and + result.getRegExp() = re and + exists(int itemStart, int itemEnd | + result.getStart() = itemStart and + re.charSetStart(start, itemStart) and + re.charSetChild(start, itemStart, itemEnd) and + result.getEnd() = itemEnd + ) + or + i > 0 and + result.getRegExp() = re and + exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() | + result.getStart() = itemStart and + re.charSetChild(start, itemStart, result.getEnd()) + ) + } + + override string getAPrimaryQlClass() { result = "RegExpCharacterClass" } +} + +class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange { + int lower_end; + int upper_start; + + RegExpCharacterRange() { + this = TRegExpCharacterRange(re, start, end) and + re.charRange(_, start, lower_end, upper_start, end) + } + + predicate isRange(string lo, string hi) { + lo = re.getText().substring(start, lower_end) and + hi = re.getText().substring(upper_start, end) + } + + override RegExpTerm getChild(int i) { + i = 0 and + result.getRegExp() = re and + result.getStart() = start and + result.getEnd() = lower_end + or + i = 1 and + result.getRegExp() = re and + result.getStart() = upper_start and + result.getEnd() = end + } + + override string getAPrimaryQlClass() { result = "RegExpCharacterRange" } +} + +class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar { + RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) } + + predicate isCharacter() { any() } + + string getValue() { result = re.getText().substring(start, end) } + + override RegExpTerm getChild(int i) { none() } + + override string getAPrimaryQlClass() { result = "RegExpNormalChar" } +} + +class RegExpConstant extends RegExpTerm { + string value; + + RegExpConstant() { + this = TRegExpNormalChar(re, start, end) and + not this instanceof RegExpCharacterClassEscape and + // exclude chars in qualifiers + // TODO: push this into regex library + not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) | + qstart <= start and end <= qend + ) and + value = this.(RegExpNormalChar).getValue() + or + this = TRegExpSpecialChar(re, start, end) and + re.inCharSet(start) and + value = this.(RegExpSpecialChar).getChar() + } + + predicate isCharacter() { any() } + + string getValue() { result = value } + + override RegExpTerm getChild(int i) { none() } + + override string getAPrimaryQlClass() { result = "RegExpConstant" } +} + +class RegExpGroup extends RegExpTerm, TRegExpGroup { + RegExpGroup() { this = TRegExpGroup(re, start, end) } + + /** + * Gets the index of this capture group within the enclosing regular + * expression literal. + * + * For example, in the regular expression `/((a?).)(?:b)/`, the + * group `((a?).)` has index 1, the group `(a?)` nested inside it + * has index 2, and the group `(?:b)` has no index, since it is + * not a capture group. + */ + int getNumber() { result = re.getGroupNumber(start, end) } + + /** Holds if this is a named capture group. */ + predicate isNamed() { exists(this.getName()) } + + /** Gets the name of this capture group, if any. */ + string getName() { result = re.getGroupName(start, end) } + + predicate isCharacter() { any() } + + string getValue() { result = re.getText().substring(start, end) } + + override RegExpTerm getChild(int i) { + result.getRegExp() = re and + i = 0 and + re.groupContents(start, end, result.getStart(), result.getEnd()) + } + + override string getAPrimaryQlClass() { result = "RegExpGroup" } +} + +class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar { + string char; + + RegExpSpecialChar() { + this = TRegExpSpecialChar(re, start, end) and + re.specialCharacter(start, end, char) + } + + predicate isCharacter() { any() } + + string getChar() { result = char } + + override RegExpTerm getChild(int i) { none() } + + override string getAPrimaryQlClass() { result = "RegExpSpecialChar" } +} + +class RegExpDot extends RegExpSpecialChar { + RegExpDot() { this.getChar() = "." } + + override string getAPrimaryQlClass() { result = "RegExpDot" } +} + +class RegExpDollar extends RegExpSpecialChar { + RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] } + + override string getAPrimaryQlClass() { result = "RegExpDollar" } +} + +class RegExpCaret extends RegExpSpecialChar { + RegExpCaret() { this.getChar() = ["^", "\\A"] } + + override string getAPrimaryQlClass() { result = "RegExpCaret" } +} + +class RegExpZeroWidthMatch extends RegExpGroup { + RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) } + + override predicate isCharacter() { any() } + + override RegExpTerm getChild(int i) { none() } + + override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" } +} + +/** + * A zero-width lookahead or lookbehind assertion. + * + * Examples: + * + * ``` + * (?=\w) + * (?!\n) + * (?<=\.) + * (? (d,e,f)` in the product automaton + * iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three + * transitions all match a shared character `char`. (see `getAThreewayIntersect`) + * + * We start a search in the product automaton at `(pivot, pivot, succ)`, + * and search for a series of transitions (a `Trace`), such that we end + * at `(pivot, succ, succ)` (see `isReachableFromStartTuple`). + * + * For example, consider the regular expression `/^\d*5\w*$/`. + * The search will start at the tuple `(\d*, \d*, \w*)` and search + * for a path to `(\d*, \w*, \w*)`. + * This path exists, and consists of a single transition in the product automaton, + * where the three corresponding NFA edges all match the character `"5"`. + * + * The start-state in the NFA has an any-transition to itself, this allows us to + * flag regular expressions such as `/a*$/` - which does not have a start anchor - + * and can thus start matching anywhere. + * + * The implementation is not perfect. + * It has the same suffix detection issue as the `js/redos` query, which can cause false positives. + * It also doesn't find all transitions in the product automaton, which can cause false negatives. + */ + +/** + * An instantiaion of `ReDoSConfiguration` for superlinear ReDoS. + */ +class SuperLinearReDoSConfiguration extends ReDoSConfiguration { + SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" } + + override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) } +} + +/** + * Gets any root (start) state of a regular expression. + */ +private State getRootState() { result = mkMatch(any(RegExpRoot r)) } + +private newtype TStateTuple = + MkStateTuple(State q1, State q2, State q3) { + // starts at (pivot, pivot, succ) + isStartLoops(q1, q3) and q1 = q2 + or + step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3) + } + +/** + * A state in the product automaton. + * The product automaton contains 3-tuples of states. + * + * We lazily only construct those states that we are actually + * going to need. + * Either a start state `(pivot, pivot, succ)`, or a state + * where there exists a transition from an already existing state. + * + * The exponential variant of this query (`js/redos`) uses an optimization + * trick where `q1 <= q2`. This trick cannot be used here as the order + * of the elements matter. + */ +class StateTuple extends TStateTuple { + State q1; + State q2; + State q3; + + StateTuple() { this = MkStateTuple(q1, q2, q3) } + + /** + * Gest a string repesentation of this tuple. + */ + string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" } + + /** + * Holds if this tuple is `(r1, r2, r3)`. + */ + pragma[noinline] + predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 } +} + +/** + * A module for determining feasible tuples for the product automaton. + * + * The implementation is split into many predicates for performance reasons. + */ +private module FeasibleTuple { + /** + * Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton. + */ + pragma[inline] + predicate isFeasibleTuple(State r1, State r2, State r3) { + // The first element is either inside a repetition (or the start state itself) + isRepetitionOrStart(r1) and + // The last element is inside a repetition + stateInsideRepetition(r3) and + // The states are reachable in the NFA in the order r1 -> r2 -> r3 + delta+(r1) = r2 and + delta+(r2) = r3 and + // The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair). + canReachABeginning(r1) and + // The last element can reach a target (the "succ" state in a `(pivot, succ)` pair). + canReachATarget(r3) + } + + /** + * Holds if `s` is either inside a repetition, or is the start state (which is a repetition). + */ + pragma[noinline] + private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() } + + /** + * Holds if state `s` might be inside a backtracking repetition. + */ + pragma[noinline] + private predicate stateInsideRepetition(State s) { + s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier + } + + /** + * Holds if there exists a path in the NFA from `s` to a "pivot" state + * (from a `(pivot, succ)` pair that starts the search). + */ + pragma[noinline] + private predicate canReachABeginning(State s) { + delta+(s) = any(State pivot | isStartLoops(pivot, _)) + } + + /** + * Holds if there exists a path in the NFA from `s` to a "succ" state + * (from a `(pivot, succ)` pair that starts the search). + */ + pragma[noinline] + private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) } +} + +/** + * Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup. + * + * There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`. + * The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query. + */ +predicate isStartLoops(State pivot, State succ) { + pivot != succ and + succ.getRepr() instanceof InfiniteRepetitionQuantifier and + delta+(pivot) = succ and + ( + pivot.getRepr() instanceof InfiniteRepetitionQuantifier + or + pivot = mkMatch(any(RegExpRoot root)) + ) +} + +/** + * Gets a state for which there exists a transition in the NFA from `s'. + */ +State delta(State s) { delta(s, _, result) } + +/** + * Holds if there are transitions from the components of `q` to the corresponding + * components of `r` labelled with `s1`, `s2`, and `s3`, respectively. + */ +pragma[noinline] +predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) { + exists(State r1, State r2, State r3 | + step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3) + ) +} + +/** + * Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3 + * labelled with `s1`, `s2`, and `s3`, respectively. + */ +pragma[noopt] +predicate step( + StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3 +) { + exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) | + deltaClosed(q1, s1, r1) and + deltaClosed(q2, s2, r2) and + deltaClosed(q3, s3, r3) and + // use noopt to force the join on `getAThreewayIntersect` to happen last. + exists(getAThreewayIntersect(s1, s2, s3)) + ) +} + +/** + * Gets a char that is matched by all the edges `s1`, `s2`, and `s3`. + * + * The result is not complete, and might miss some combination of edges that share some character. + */ +pragma[noinline] +string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) { + result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)] + or + result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)] + or + result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)] +} + +/** + * Gets the minimum and maximum characters that intersect between `a` and `b`. + * This predicate is used to limit the size of `getAThreewayIntersect`. + */ +pragma[noinline] +string minAndMaxIntersect(InputSymbol a, InputSymbol b) { + result = [min(intersect(a, b)), max(intersect(a, b))] +} + +private newtype TTrace = + Nil() or + Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) { + exists(StateTuple p | + isReachableFromStartTuple(_, _, p, t, _) and + step(p, s1, s2, s3, _) + ) + or + exists(State pivot, State succ | isStartLoops(pivot, succ) | + t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _) + ) + } + +/** + * A list of tuples of input symbols that describe a path in the product automaton + * starting from some start state. + */ +class Trace extends TTrace { + /** + * Gets a string representation of this Trace that can be used for debug purposes. + */ + string toString() { + this = Nil() and result = "Nil()" + or + exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) | + result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")" + ) + } +} + +/** + * Gets a string corresponding to the trace `t`. + */ +string concretise(Trace t) { + t = Nil() and result = "" + or + exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace rest | t = Step(s1, s2, s3, rest) | + result = concretise(rest) + getAThreewayIntersect(s1, s2, s3) + ) +} + +/** + * Holds if there exists a transition from `r` to `q` in the product automaton. + * Notice that the arguments are flipped, and thus the direction is backwards. + */ +pragma[noinline] +predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) } + +/** + * Holds if `tuple` is an end state in our search. + * That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`. + */ +predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) } + +/** + * Gets the minimum length of a path from `r` to some an end state `end`. + * + * The implementation searches backwards from the end-tuple. + * This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small. + * The `end` argument must always be an end state. + */ +int distBackFromEnd(StateTuple r, StateTuple end) = + shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result) + +/** + * Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that: + * `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton, + * and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`, + * and a path from a start-state to `tuple` follows the transitions in `trace`. + */ +predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) { + // base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path. + exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 | + isStartLoops(pivot, succ) and + step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and + tuple = MkStateTuple(q1, q2, q3) and + trace = Step(s1, s2, s3, Nil()) and + dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) + ) + or + // recursive case + exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 | + isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and + dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and + trace = Step(s1, s2, s3, v) + ) +} + +/** + * Helper predicate for the recursive case in `isReachableFromStartTuple`. + */ +pragma[noinline] +private int isReachableFromStartTupleHelper( + State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2, + InputSymbol s3 +) { + result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and + step(p, s1, s2, s3, r) +} + +/** + * Gets the tuple `(pivot, succ, succ)` from the product automaton. + */ +StateTuple getAnEndTuple(State pivot, State succ) { + isStartLoops(pivot, succ) and + result = MkStateTuple(pivot, succ, succ) +} + +/** + * Holds if matching repetitions of `pump` can: + * 1) Transition from `pivot` back to `pivot`. + * 2) Transition from `pivot` to `succ`. + * 3) Transition from `succ` to `succ`. + * + * From theorem 3 in the paper linked in the top of this file we can therefore conclude that + * the regular expression has polynomial backtracking - if a rejecting suffix exists. + * + * This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are + * available in the `hasReDoSResult` predicate. + */ +predicate isPumpable(State pivot, State succ, string pump) { + exists(StateTuple q, Trace t | + isReachableFromStartTuple(pivot, succ, q, t, _) and + q = getAnEndTuple(pivot, succ) and + pump = concretise(t) + ) +} + +/** + * Holds if repetitions of `pump` at `t` will cause polynomial backtracking. + */ +predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) { + exists(State s, State pivot | + hasReDoSResult(t, pump, s, prefixMsg) and + isPumpable(pivot, s, _) and + prev = pivot.getRepr() + ) +} + +/** + * Gets a message for why `term` can cause polynomial backtracking. + */ +string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) { + polynimalReDoS(term, pump, prefixMsg, prev) and + result = + "Strings " + prefixMsg + "with many repetitions of '" + pump + + "' can start matching anywhere after the start of the preceeding " + prev +} + +/** + * A term that may cause a regular expression engine to perform a + * polynomial number of match attempts, relative to the input length. + */ +class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier { + string reason; + string pump; + string prefixMsg; + RegExpTerm prev; + + PolynomialBackTrackingTerm() { + reason = getReasonString(this, pump, prefixMsg, prev) and + // there might be many reasons for this term to have polynomial backtracking - we pick the shortest one. + reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg) + } + + /** + * Holds if all non-empty successors to the polynomial backtracking term matches the end of the line. + */ + predicate isAtEndLine() { + forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) | + succ instanceof RegExpDollar + ) + } + + /** + * Gets the string that should be repeated to cause this regular expression to perform polynomially. + */ + string getPumpString() { result = pump } + + /** + * Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking. + */ + string getPrefixMessage() { result = prefixMsg } + + /** + * Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking. + */ + RegExpTerm getPreviousLoop() { result = prev } + + /** + * Gets the reason for the number of match attempts. + */ + string getReason() { result = reason } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll new file mode 100644 index 00000000000..4baceba42db --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll @@ -0,0 +1,40 @@ +private import ruby +private import codeql.ruby.DataFlow +private import codeql.ruby.Concepts +private import codeql.ruby.Frameworks +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.dataflow.BarrierGuards + +/** + * Provides default sources, sinks and sanitizers for detecting + * "Code injection" vulnerabilities, as well as extension points for + * adding your own. + */ +module CodeInjection { + /** + * A data flow source for "Code injection" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "Code injection" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer guard for "Code injection" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * A call that evaluates its arguments as Ruby code, considered as a flow sink. + */ + class CodeExecutionAsSink extends Sink { + CodeExecutionAsSink() { this = any(CodeExecution c).getCode() } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll new file mode 100644 index 00000000000..95e08a82dc3 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll @@ -0,0 +1,29 @@ +/** + * Provides a taint-tracking configuration for detecting "Code injection" vulnerabilities. + * + * Note, for performance reasons: only import this file if `Configuration` is needed, + * otherwise `CodeInjectionCustomizations` should be imported instead. + */ + +import codeql.ruby.DataFlow::DataFlow::PathGraph +import codeql.ruby.DataFlow +import codeql.ruby.TaintTracking +import CodeInjectionCustomizations::CodeInjection +import codeql.ruby.dataflow.BarrierGuards + +/** + * A taint-tracking configuration for detecting "Code injection" vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "CodeInjection" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard or + guard instanceof StringConstCompare or + guard instanceof StringConstArrayInclusionCall + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll new file mode 100644 index 00000000000..b39455195be --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll @@ -0,0 +1,54 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about + * command-injection vulnerabilities, as well as extension points for + * adding your own. + */ + +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.Concepts +private import codeql.ruby.Frameworks +private import codeql.ruby.ApiGraphs + +module CommandInjection { + /** + * A data flow source for command-injection vulnerabilities. + */ + abstract class Source extends DataFlow::Node { + /** Gets a string that describes the type of this remote flow source. */ + abstract string getSourceType(); + } + + /** + * A data flow sink for command-injection vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for command-injection vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** A source of remote user input, considered as a flow source for command injection. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + + override string getSourceType() { result = "a user-provided value" } + } + + /** + * A command argument to a function that initiates an operating system command. + */ + class SystemCommandExecutionSink extends Sink { + SystemCommandExecutionSink() { exists(SystemCommandExecution c | c.isShellInterpreted(this)) } + } + + /** + * A call to `Shellwords.escape` or `Shellwords.shellescape` sanitizes its input. + */ + class ShellwordsEscapeAsSanitizer extends Sanitizer { + ShellwordsEscapeAsSanitizer() { + this = API::getTopLevelMember("Shellwords").getAMethodCall(["escape", "shellescape"]) + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll new file mode 100644 index 00000000000..25460ad65df --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll @@ -0,0 +1,32 @@ +/** + * Provides a taint tracking configuration for reasoning about + * command-injection vulnerabilities (CWE-078). + * + * Note, for performance reasons: only import this file if + * `CommandInjection::Configuration` is needed, otherwise + * `CommandInjectionCustomizations` should be imported instead. + */ + +import ruby +import codeql.ruby.TaintTracking +import CommandInjectionCustomizations::CommandInjection +import codeql.ruby.DataFlow +import codeql.ruby.dataflow.BarrierGuards + +/** + * A taint-tracking configuration for reasoning about command-injection vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "CommandInjection" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof StringConstCompare or + guard instanceof StringConstArrayInclusionCall + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll new file mode 100644 index 00000000000..60e152a06fc --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll @@ -0,0 +1,39 @@ +/** + * Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `ReflectedXSS::Configuration` is needed, otherwise + * `XSS::ReflectedXSS` should be imported instead. + */ + +private import ruby +import codeql.ruby.DataFlow +import codeql.ruby.TaintTracking + +/** + * Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities. + */ +module ReflectedXSS { + import XSS::ReflectedXSS + + /** + * A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities. + */ + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "ReflectedXSS" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalXSSTaintStep(node1, node2) + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll new file mode 100644 index 00000000000..2a089050e5a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll @@ -0,0 +1,40 @@ +/** + * Provides a taint-tracking configuration for reasoning about stored + * cross-site scripting vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `StoredXSS::Configuration` is needed, otherwise + * `XSS::StoredXSS` should be imported instead. + */ + +import ruby +import codeql.ruby.DataFlow +import codeql.ruby.TaintTracking + +module StoredXSS { + import XSS::StoredXSS + + /** + * A taint-tracking configuration for reasoning about Stored XSS. + */ + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "StoredXss" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalXSSTaintStep(node1, node2) + } + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll new file mode 100644 index 00000000000..0e39e053b2a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll @@ -0,0 +1,190 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about unsafe + * deserialization, as well as extension points for adding your own. + */ + +private import ruby +private import codeql.ruby.ApiGraphs +private import codeql.ruby.CFG +private import codeql.ruby.DataFlow +private import codeql.ruby.dataflow.RemoteFlowSources + +module UnsafeDeserialization { + /** + * A data flow source for unsafe deserialization vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for unsafe deserialization vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for unsafe deserialization vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * Additional taint steps for "unsafe deserialization" vulnerabilities. + */ + predicate isAdditionalTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) { + base64DecodeTaintStep(fromNode, toNode) + } + + /** A source of remote user input, considered as a flow source for unsafe deserialization. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + } + + /** + * An argument in a call to `Marshal.load` or `Marshal.restore`, considered a + * sink for unsafe deserialization. + */ + class MarshalLoadOrRestoreArgument extends Sink { + MarshalLoadOrRestoreArgument() { + this = API::getTopLevelMember("Marshal").getAMethodCall(["load", "restore"]).getArgument(0) + } + } + + /** + * An argument in a call to `YAML.load`, considered a sink for unsafe + * deserialization. + */ + class YamlLoadArgument extends Sink { + YamlLoadArgument() { + this = API::getTopLevelMember("YAML").getAMethodCall("load").getArgument(0) + } + } + + /** + * An argument in a call to `JSON.load` or `JSON.restore`, considered a sink + * for unsafe deserialization. + */ + class JsonLoadArgument extends Sink { + JsonLoadArgument() { + this = API::getTopLevelMember("JSON").getAMethodCall(["load", "restore"]).getArgument(0) + } + } + + private string getAKnownOjModeName(boolean isSafe) { + result = ["compat", "custom", "json", "null", "rails", "strict", "wab"] and isSafe = true + or + result = "object" and isSafe = false + } + + private predicate isOjModePair(Pair p, string modeValue) { + p.getKey().getValueText() = "mode" and + exists(DataFlow::LocalSourceNode symbolLiteral, DataFlow::Node value | + symbolLiteral.asExpr().getExpr().(SymbolLiteral).getValueText() = modeValue and + symbolLiteral.flowsTo(value) and + value.asExpr().getExpr() = p.getValue() + ) + } + + /** + * A node representing a hash that contains the key `:mode`. + */ + private class OjOptionsHashWithModeKey extends DataFlow::Node { + private string modeValue; + + OjOptionsHashWithModeKey() { + exists(DataFlow::LocalSourceNode options | + options.flowsTo(this) and + isOjModePair(options.asExpr().getExpr().(HashLiteral).getAKeyValuePair(), modeValue) + ) + } + + /** + * Holds if this hash node contains a `:mode` key whose value is one known + * to be `isSafe` with untrusted data. + */ + predicate hasKnownMode(boolean isSafe) { modeValue = getAKnownOjModeName(isSafe) } + + /** + * Holds if this hash node contains a `:mode` key whose value is one of the + * `Oj` modes known to be safe to use with untrusted data. + */ + predicate hasSafeMode() { this.hasKnownMode(true) } + } + + /** + * A call node that sets `Oj.default_options`. + * + * ```rb + * Oj.default_options = { allow_blank: true, mode: :compat } + * ``` + */ + private class SetOjDefaultOptionsCall extends DataFlow::CallNode { + SetOjDefaultOptionsCall() { + this = API::getTopLevelMember("Oj").getAMethodCall("default_options=") + } + + /** + * Gets the value being assigned to `Oj.default_options`. + */ + DataFlow::Node getValue() { + result.asExpr() = + this.getArgument(0).asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs() + } + } + + /** + * A call to `Oj.load`. + */ + private class OjLoadCall extends DataFlow::CallNode { + OjLoadCall() { this = API::getTopLevelMember("Oj").getAMethodCall("load") } + + /** + * Holds if this call to `Oj.load` includes an explicit options hash + * argument that sets the mode to one that is known to be `isSafe`. + */ + predicate hasExplicitKnownMode(boolean isSafe) { + exists(DataFlow::Node arg, int i | i >= 1 and arg = this.getArgument(i) | + arg.(OjOptionsHashWithModeKey).hasKnownMode(isSafe) + or + isOjModePair(arg.asExpr().getExpr(), getAKnownOjModeName(isSafe)) + ) + } + } + + /** + * An argument in a call to `Oj.load` where the mode is `:object` (which is + * the default), considered a sink for unsafe deserialization. + */ + class UnsafeOjLoadArgument extends Sink { + UnsafeOjLoadArgument() { + exists(OjLoadCall ojLoad | + this = ojLoad.getArgument(0) and + // Exclude calls that explicitly pass a safe mode option. + not ojLoad.hasExplicitKnownMode(true) and + ( + // Sinks to include: + // - Calls with an explicit, unsafe mode option. + ojLoad.hasExplicitKnownMode(false) + or + // - Calls with no explicit mode option, unless there exists a call + // anywhere to set the default options to a known safe mode. + not ojLoad.hasExplicitKnownMode(_) and + not exists(SetOjDefaultOptionsCall setOpts | + setOpts.getValue().(OjOptionsHashWithModeKey).hasSafeMode() + ) + ) + ) + } + } + + /** + * `Base64.decode64` propagates taint from its argument to its return value. + */ + predicate base64DecodeTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) { + exists(DataFlow::CallNode callNode | + callNode = + API::getTopLevelMember("Base64") + .getAMethodCall(["decode64", "strict_decode64", "urlsafe_decode64"]) + | + fromNode = callNode.getArgument(0) and + toNode = callNode + ) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll new file mode 100644 index 00000000000..d08b73da936 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll @@ -0,0 +1,34 @@ +/** + * Provides a taint-tracking configuration for reasoning about unsafe deserialization. + * + * Note, for performance reasons: only import this file if + * `UnsafeDeserialization::Configuration` is needed, otherwise + * `UnsafeDeserializationCustomizations` should be imported instead. + */ + +private import ruby +private import codeql.ruby.DataFlow +private import codeql.ruby.TaintTracking +import UnsafeDeserializationCustomizations + +/** + * A taint-tracking configuration for reasoning about unsafe deserialization. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "UnsafeDeserialization" } + + override predicate isSource(DataFlow::Node source) { + source instanceof UnsafeDeserialization::Source + } + + override predicate isSink(DataFlow::Node sink) { sink instanceof UnsafeDeserialization::Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof UnsafeDeserialization::Sanitizer + } + + override predicate isAdditionalTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) { + UnsafeDeserialization::isAdditionalTaintStep(fromNode, toNode) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll new file mode 100644 index 00000000000..caaf2264018 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll @@ -0,0 +1,127 @@ +/** + * Provides default sources, sinks and sanitizers for detecting "URL + * redirection" vulnerabilities, as well as extension points for adding your + * own. + */ + +private import ruby +private import codeql.ruby.DataFlow +private import codeql.ruby.Concepts +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.dataflow.BarrierGuards + +/** + * Provides default sources, sinks and sanitizers for detecting + * "URL redirection" vulnerabilities, as well as extension points for + * adding your own. + */ +module UrlRedirect { + /** + * A data flow source for "URL redirection" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "URL redirection" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for "URL redirection" vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A sanitizer guard for "URL redirection" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * Additional taint steps for "URL redirection" vulnerabilities. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + taintStepViaMethodCallReturnValue(node1, node2) + } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * A HTTP redirect response, considered as a flow sink. + */ + class RedirectLocationAsSink extends Sink { + RedirectLocationAsSink() { + exists(HTTP::Server::HttpRedirectResponse e | + this = e.getRedirectLocation() and + // As a rough heuristic, assume that methods with these names are handlers for POST/PUT/PATCH/DELETE requests, + // which are not as vulnerable to URL redirection because browsers will not initiate them from clicking a link. + not this.getEnclosingCallable() + .asCallable() + .(Method) + .getName() + .regexpMatch(".*(create|update|destroy).*") + ) + } + } + + /** + * A comparison with a constant string, considered as a sanitizer-guard. + */ + class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + /** + * Some methods will propagate taint to their return values. + * Here we cover a few common ones related to `ActionController::Parameters`. + * TODO: use ApiGraphs or something to restrict these method calls to the correct receiver, rather + * than matching on method name alone. + */ + predicate taintStepViaMethodCallReturnValue(DataFlow::Node node1, DataFlow::Node node2) { + exists(MethodCall m | m = node2.asExpr().getExpr() | + m.getReceiver() = node1.asExpr().getExpr() and + (actionControllerTaintedMethod(m) or hashTaintedMethod(m)) + ) + } + + /** + * String interpolation is considered safe, provided the string is prefixed by a non-tainted value. + * In most cases this will prevent the tainted value from controlling e.g. the host of the URL. + * + * For example: + * + * ```ruby + * redirect_to "/users/#{params[:key]}" # safe + * redirect_to "#{params[:key]}/users" # unsafe + * ``` + * + * There are prefixed interpolations that are not safe, e.g. + * + * ```ruby + * redirect_to "foo#{params[:key]}/users" # => "foo-malicious-site.com/users" + * ``` + * + * We currently don't catch these cases. + */ + class StringInterpolationAsSanitizer extends Sanitizer { + StringInterpolationAsSanitizer() { + exists(StringlikeLiteral str, int n | str.getComponent(n) = this.asExpr().getExpr() and n > 0) + } + } + + /** + * These methods return a new `ActionController::Parameters` or a `Hash` containing a subset of + * the original values. This may still contain user input, so the results are tainted. + * TODO: flesh this out to cover the whole API. + */ + predicate actionControllerTaintedMethod(MethodCall m) { + m.getMethodName() in ["to_unsafe_hash", "to_unsafe_h", "permit", "require"] + } + + /** + * These `Hash` methods preserve taint because they return a new hash which may still contain keys + * with user input. + * TODO: flesh this out to cover the whole API. + */ + predicate hashTaintedMethod(MethodCall m) { m.getMethodName() in ["merge", "fetch"] } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll new file mode 100644 index 00000000000..5a984d1fd6e --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll @@ -0,0 +1,34 @@ +/** + * Provides a taint-tracking configuration for detecting "URL redirection" vulnerabilities. + * + * Note, for performance reasons: only import this file if `Configuration` is needed, + * otherwise `UrlRedirectCustomizations` should be imported instead. + */ + +private import ruby +import codeql.ruby.DataFlow::DataFlow::PathGraph +import codeql.ruby.DataFlow +import codeql.ruby.TaintTracking +import UrlRedirectCustomizations +import UrlRedirectCustomizations::UrlRedirect + +/** + * A taint-tracking configuration for detecting "URL redirection" vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "UrlRedirect" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + UrlRedirect::isAdditionalTaintStep(node1, node2) + } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll new file mode 100644 index 00000000000..8f8f15b630a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll @@ -0,0 +1,369 @@ +/** + * Provides classes and predicates used by the XSS queries. + */ + +private import ruby +private import codeql.ruby.DataFlow +private import codeql.ruby.DataFlow2 +private import codeql.ruby.CFG +private import codeql.ruby.Concepts +private import codeql.ruby.Frameworks +private import codeql.ruby.frameworks.ActionController +private import codeql.ruby.frameworks.ActionView +private import codeql.ruby.dataflow.RemoteFlowSources +private import codeql.ruby.dataflow.BarrierGuards +private import codeql.ruby.dataflow.internal.DataFlowDispatch + +/** + * Provides default sources, sinks and sanitizers for detecting + * "server-side cross-site scripting" vulnerabilities, as well as + * extension points for adding your own. + */ +private module Shared { + /** + * A data flow source for "server-side cross-site scripting" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "server-side cross-site scripting" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for "server-side cross-site scripting" vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A sanitizer guard for "server-side cross-site scripting" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + private class ErbOutputMethodCallArgumentNode extends DataFlow::Node { + private MethodCall call; + + ErbOutputMethodCallArgumentNode() { + exists(ErbOutputDirective d | + call = d.getTerminalStmt() and + this.asExpr().getExpr() = call.getAnArgument() + ) + } + + MethodCall getCall() { result = call } + } + + /** + * An `html_safe` call marking the output as not requiring HTML escaping, + * considered as a flow sink. + */ + class HtmlSafeCallAsSink extends Sink { + HtmlSafeCallAsSink() { + exists(HtmlSafeCall c, ErbOutputDirective d | + this.asExpr().getExpr() = c.getReceiver() and + c = d.getTerminalStmt() + ) + } + } + + /** + * An argument to a call to the `raw` method, considered as a flow sink. + */ + class RawCallArgumentAsSink extends Sink, ErbOutputMethodCallArgumentNode { + RawCallArgumentAsSink() { this.getCall() instanceof RawCall } + } + + /** + * A argument to a call to the `link_to` method, which does not expect + * unsanitized user-input, considered as a flow sink. + */ + class LinkToCallArgumentAsSink extends Sink, ErbOutputMethodCallArgumentNode { + LinkToCallArgumentAsSink() { + this.asExpr().getExpr() = this.getCall().(LinkToCall).getPathArgument() + } + } + + /** + * An HTML escaping, considered as a sanitizer. + */ + class HtmlEscapingAsSanitizer extends Sanitizer { + HtmlEscapingAsSanitizer() { this = any(HtmlEscaping esc).getOutput() } + } + + /** + * A comparison with a constant string, considered as a sanitizer-guard. + */ + class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + /** + * An inclusion check against an array of constant strings, considered as a sanitizer-guard. + */ + class StringConstArrayInclusionCallAsSanitizerGuard extends SanitizerGuard, + StringConstArrayInclusionCall { } + + /** + * A `VariableWriteAccessCfgNode` that is not succeeded (locally) by another + * write to that variable. + */ + private class FinalInstanceVarWrite extends CfgNodes::ExprNodes::InstanceVariableWriteAccessCfgNode { + private InstanceVariable var; + + FinalInstanceVarWrite() { + var = this.getExpr().getVariable() and + not exists(CfgNodes::ExprNodes::InstanceVariableWriteAccessCfgNode succWrite | + succWrite.getExpr().getVariable() = var + | + succWrite = this.getASuccessor+() + ) + } + + InstanceVariable getVariable() { result = var } + + AssignExpr getAnAssignExpr() { result.getLeftOperand() = this.getExpr() } + } + + /** + * Holds if `call` is a method call in ERB file `erb`, targeting a method + * named `name`. + */ + pragma[noinline] + private predicate isMethodCall(MethodCall call, string name, ErbFile erb) { + name = call.getMethodName() and + erb = call.getLocation().getFile() + } + + /** + * Holds if some render call passes `value` for `hashKey` in the `locals` + * argument, in ERB file `erb`. + */ + pragma[noinline] + private predicate renderCallLocals(string hashKey, Expr value, ErbFile erb) { + exists(RenderCall call, Pair kvPair | + call.getLocals().getAKeyValuePair() = kvPair and + kvPair.getValue() = value and + kvPair.getKey().getValueText() = hashKey and + call.getTemplateFile() = erb + ) + } + + pragma[noinline] + private predicate isFlowFromLocals0( + CfgNodes::ExprNodes::ElementReferenceCfgNode refNode, string hashKey, ErbFile erb + ) { + exists(DataFlow::Node argNode, CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode | + argNode.asExpr() = refNode.getArgument(0) and + refNode.getReceiver().getExpr().(MethodCall).getMethodName() = "local_assigns" and + argNode.getALocalSource() = DataFlow::exprNode(strNode) and + strNode.getExpr().getValueText() = hashKey and + erb = refNode.getFile() + ) + } + + private predicate isFlowFromLocals(DataFlow::Node node1, DataFlow::Node node2) { + exists(string hashKey, ErbFile erb | + // node1 is a `locals` argument to a render call... + renderCallLocals(hashKey, node1.asExpr().getExpr(), erb) + | + // node2 is an element reference against `local_assigns` + isFlowFromLocals0(node2.asExpr(), hashKey, erb) + or + // ...node2 is a "method call" to a "method" with `hashKey` as its name + // TODO: This may be a variable read in reality that we interpret as a method call + isMethodCall(node2.asExpr().getExpr(), hashKey, erb) + ) + } + + /** + * Holds if `action` contains an assignment of `value` to an instance + * variable named `name`, in ERB file `erb`. + */ + pragma[noinline] + private predicate actionAssigns( + ActionControllerActionMethod action, string name, Expr value, ErbFile erb + ) { + exists(AssignExpr ae, FinalInstanceVarWrite controllerVarWrite | + action.getDefaultTemplateFile() = erb and + ae.getParent+() = action and + ae = controllerVarWrite.getAnAssignExpr() and + name = controllerVarWrite.getVariable().getName() and + value = ae.getRightOperand() + ) + } + + pragma[noinline] + private predicate isVariableReadAccess(VariableReadAccess viewVarRead, string name, ErbFile erb) { + erb = viewVarRead.getLocation().getFile() and + viewVarRead.getVariable().getName() = name + } + + private predicate isFlowFromControllerInstanceVariable(DataFlow::Node node1, DataFlow::Node node2) { + // instance variables in the controller + exists(ActionControllerActionMethod action, string name, ErbFile template | + // match read to write on variable name + actionAssigns(action, name, node1.asExpr().getExpr(), template) and + // propagate taint from assignment RHS expr to variable read access in view + isVariableReadAccess(node2.asExpr().getExpr(), name, template) + ) + } + + /** + * Holds if `helperMethod` is a helper method named `name` that is associated + * with ERB file `erb`. + */ + pragma[noinline] + private predicate isHelperMethod( + ActionControllerHelperMethod helperMethod, string name, ErbFile erb + ) { + helperMethod.getName() = name and + helperMethod.getControllerClass() = getAssociatedControllerClass(erb) + } + + private predicate isFlowIntoHelperMethod(DataFlow::Node node1, DataFlow::Node node2) { + // flow from template into controller helper method + exists( + ErbFile template, ActionControllerHelperMethod helperMethod, string name, + CfgNodes::ExprNodes::MethodCallCfgNode helperMethodCall, int argIdx + | + isHelperMethod(helperMethod, name, template) and + isMethodCall(helperMethodCall.getExpr(), name, template) and + helperMethodCall.getArgument(pragma[only_bind_into](argIdx)) = node1.asExpr() and + helperMethod.getParameter(pragma[only_bind_into](argIdx)) = node2.asExpr().getExpr() + ) + } + + private predicate isFlowFromHelperMethod(DataFlow::Node node1, DataFlow::Node node2) { + // flow out of controller helper method into template + exists(ErbFile template, ActionControllerHelperMethod helperMethod, string name | + // `node1` is an expr node that may be returned by the helper method + exprNodeReturnedFrom(node1, helperMethod) and + // `node2` is a call to the helper method + isHelperMethod(helperMethod, name, template) and + isMethodCall(node2.asExpr().getExpr(), name, template) + ) + } + + /** + * An additional step that is preserves dataflow in the context of XSS. + */ + predicate isAdditionalXSSFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isFlowFromLocals(node1, node2) + or + isFlowFromControllerInstanceVariable(node1, node2) + or + isFlowIntoHelperMethod(node1, node2) + or + isFlowFromHelperMethod(node1, node2) + } +} + +/** + * Provides default sources, sinks and sanitizers for detecting + * "reflected cross-site scripting" vulnerabilities, as well as + * extension points for adding your own. + */ +module ReflectedXSS { + /** A data flow source for stored XSS vulnerabilities. */ + abstract class Source extends Shared::Source { } + + /** A data flow sink for stored XSS vulnerabilities. */ + abstract class Sink extends Shared::Sink { } + + /** A sanitizer for stored XSS vulnerabilities. */ + abstract class Sanitizer extends Shared::Sanitizer { } + + /** A sanitizer guard for stored XSS vulnerabilities. */ + abstract class SanitizerGuard extends Shared::SanitizerGuard { } + + // Consider all arbitrary XSS sinks to be reflected XSS sinks + private class AnySink extends Sink instanceof Shared::Sink { } + + // Consider all arbitrary XSS sanitizers to be reflected XSS sanitizers + private class AnySanitizer extends Sanitizer instanceof Shared::Sanitizer { } + + // Consider all arbitrary XSS sanitizer guards to be reflected XSS sanitizer guards + private class AnySanitizerGuard extends SanitizerGuard instanceof Shared::SanitizerGuard { + override predicate checks(CfgNode expr, boolean branch) { + Shared::SanitizerGuard.super.checks(expr, branch) + } + } + + /** + * An additional step that is preserves dataflow in the context of reflected XSS. + */ + predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2; + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } +} + +private module OrmTracking { + /** + * A data flow configuration to track flow from finder calls to field accesses. + */ + class Configuration extends DataFlow2::Configuration { + Configuration() { this = "OrmTracking" } + + override predicate isSource(DataFlow2::Node source) { source instanceof OrmInstantiation } + + // Select any call node and narrow down later + override predicate isSink(DataFlow2::Node sink) { sink instanceof DataFlow2::CallNode } + + override predicate isAdditionalFlowStep(DataFlow2::Node node1, DataFlow2::Node node2) { + Shared::isAdditionalXSSFlowStep(node1, node2) + or + // Propagate flow through arbitrary method calls + node2.(DataFlow2::CallNode).getReceiver() = node1 + or + // Propagate flow through "or" expressions `or`/`||` + node2.asExpr().getExpr().(LogicalOrExpr).getAnOperand() = node1.asExpr().getExpr() + } + } +} + +module StoredXSS { + /** A data flow source for stored XSS vulnerabilities. */ + abstract class Source extends Shared::Source { } + + /** A data flow sink for stored XSS vulnerabilities. */ + abstract class Sink extends Shared::Sink { } + + /** A sanitizer for stored XSS vulnerabilities. */ + abstract class Sanitizer extends Shared::Sanitizer { } + + /** A sanitizer guard for stored XSS vulnerabilities. */ + abstract class SanitizerGuard extends Shared::SanitizerGuard { } + + // Consider all arbitrary XSS sinks to be stored XSS sinks + private class AnySink extends Sink instanceof Shared::Sink { } + + // Consider all arbitrary XSS sanitizers to be stored XSS sanitizers + private class AnySanitizer extends Sanitizer instanceof Shared::Sanitizer { } + + // Consider all arbitrary XSS sanitizer guards to be stored XSS sanitizer guards + private class AnySanitizerGuard extends SanitizerGuard instanceof Shared::SanitizerGuard { + override predicate checks(CfgNode expr, boolean branch) { + Shared::SanitizerGuard.super.checks(expr, branch) + } + } + + /** + * An additional step that preserves dataflow in the context of stored XSS. + */ + predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2; + + private class OrmFieldAsSource extends Source instanceof DataFlow2::CallNode { + OrmFieldAsSource() { + exists(OrmTracking::Configuration subConfig, DataFlow2::CallNode subSrc, MethodCall call | + subConfig.hasFlow(subSrc, this) and + call = this.asExpr().getExpr() and + subSrc.(OrmInstantiation).methodCallMayAccessField(call.getMethodName()) + ) + } + } + + /** A file read, considered as a flow source for stored XSS. */ + private class FileSystemReadAccessAsSource extends Source instanceof FileSystemReadAccess { } + // TODO: Consider `FileNameSource` flowing to script tag `src` attributes and similar +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll new file mode 100644 index 00000000000..6ced6a8206e --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll @@ -0,0 +1,470 @@ +/** Step Summaries and Type Tracking */ + +private import TypeTrackerSpecific + +/** + * Any string that may appear as the name of a piece of content. This will usually include things like: + * - Attribute names (in Python) + * - Property names (in JavaScript) + * + * In general, this can also be used to model things like stores to specific list indices. To ensure + * correctness, it is important that + * + * - different types of content do not have overlapping names, and + * - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of + * content instead. + */ +class ContentName extends string { + ContentName() { this = getPossibleContentName() } +} + +/** Either a content name, or the empty string (representing no content). */ +class OptionalContentName extends string { + OptionalContentName() { this instanceof ContentName or this = "" } +} + +cached +private module Cached { + /** + * A description of a step on an inter-procedural data flow path. + */ + cached + newtype TStepSummary = + LevelStep() or + CallStep() or + ReturnStep() or + StoreStep(ContentName content) or + LoadStep(ContentName content) + + /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ + cached + TypeTracker append(TypeTracker tt, StepSummary step) { + exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) | + step = LevelStep() and result = tt + or + step = CallStep() and result = MkTypeTracker(true, content) + or + step = ReturnStep() and hasCall = false and result = tt + or + step = LoadStep(content) and result = MkTypeTracker(hasCall, "") + or + exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p)) + ) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * heap and/or intra-procedural step from `nodeFrom` to `nodeTo`. + * + * Steps contained in this predicate should _not_ depend on the call graph. + */ + cached + predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary)) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * inter-procedural step from `nodeFrom` to `nodeTo`. + */ + cached + predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary)) + } +} + +private import Cached + +/** + * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead. + * + * A description of a step on an inter-procedural data flow path. + */ +class StepSummary extends TStepSummary { + /** Gets a textual representation of this step summary. */ + string toString() { + this instanceof LevelStep and result = "level" + or + this instanceof CallStep and result = "call" + or + this instanceof ReturnStep and result = "return" + or + exists(string content | this = StoreStep(content) | result = "store " + content) + or + exists(string content | this = LoadStep(content) | result = "load " + content) + } +} + +pragma[noinline] +private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + jumpStep(nodeFrom, nodeTo) and + summary = LevelStep() + or + exists(string content | + StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and + summary = StoreStep(content) + or + basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content) + ) +} + +pragma[noinline] +private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + callStep(nodeFrom, nodeTo) and summary = CallStep() + or + returnStep(nodeFrom, nodeTo) and + summary = ReturnStep() +} + +/** Provides predicates for updating step summaries (`StepSummary`s). */ +module StepSummary { + /** + * Gets the summary that corresponds to having taken a forwards + * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + * + * This predicate is inlined, which enables better join-orders when + * the call graph construction and type tracking are mutually recursive. + * In such cases, non-linear recursion involving `step` will be limited + * to non-linear recursion for the parts of `step` that involve the + * call graph. + */ + pragma[inline] + predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + stepNoCall(nodeFrom, nodeTo, summary) + or + stepCall(nodeFrom, nodeTo, summary) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + * + * Unlike `StepSummary::step`, this predicate does not compress + * type-preserving steps. + */ + pragma[inline] + predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) { + smallstepNoCall(nodeFrom, nodeTo, summary) + or + smallstepCall(nodeFrom, nodeTo, summary) + } + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + * + * Note that `nodeTo` will always be a local source node that flows to the place where the content + * is written in `basicStoreStep`. This may lead to the flow of information going "back in time" + * from the point of view of the execution of the program. + * + * For instance, if we interpret attribute writes in Python as writing to content with the same + * name as the attribute and consider the following snippet + * + * ```python + * def foo(y): + * x = Foo() + * bar(x) + * x.attr = y + * baz(x) + * + * def bar(x): + * z = x.attr + * ``` + * for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`, + * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the + * function. This means we will track the fact that `x.attr` can have the type of `y` into the + * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called. + */ + predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) { + exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content)) + } +} + +private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content) + +/** + * Summary of the steps needed to track a value to a given dataflow node. + * + * This can be used to track objects that implement a certain API in order to + * recognize calls to that API. Note that type-tracking does not by itself provide a + * source/sink relation, that is, it may determine that a node has a given type, + * but it won't determine where that type came from. + * + * It is recommended that all uses of this type are written in the following form, + * for tracking some type `myType`: + * ```ql + * DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) { + * t.start() and + * result = < source of myType > + * or + * exists (DataFlow::TypeTracker t2 | + * result = myType(t2).track(t2, t) + * ) + * } + * + * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) } + * ``` + * + * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent + * `t = t2.step(myType(t2), result)`. If you additionally want to track individual + * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`. + */ +class TypeTracker extends TTypeTracker { + Boolean hasCall; + OptionalContentName content; + + TypeTracker() { this = MkTypeTracker(hasCall, content) } + + /** Gets the summary resulting from appending `step` to this type-tracking summary. */ + TypeTracker append(StepSummary step) { result = append(this, step) } + + /** Gets a textual representation of this summary. */ + string toString() { + exists(string withCall, string withContent | + (if hasCall = true then withCall = "with" else withCall = "without") and + (if content != "" then withContent = " with content " + content else withContent = "") and + result = "type tracker " + withCall + " call steps" + withContent + ) + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { hasCall = false and content = "" } + + /** + * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`. + * The type tracking only ends after the content has been loaded. + */ + predicate startInContent(ContentName contentName) { hasCall = false and content = contentName } + + /** + * Holds if this is the starting point of type tracking + * when tracking a parameter into a call, but not out of it. + */ + predicate call() { hasCall = true and content = "" } + + /** + * Holds if this is the end point of type tracking. + */ + predicate end() { content = "" } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been tracked into a call. + */ + boolean hasCall() { result = hasCall } + + /** + * INTERNAL. DO NOT USE. + * + * Gets the content associated with this type tracker. + */ + string getContent() { result = content } + + /** + * Gets a type tracker that starts where this one has left off to allow continued + * tracking. + * + * This predicate is only defined if the type is not associated to a piece of content. + */ + TypeTracker continue() { content = "" and result = this } + + /** + * Gets the summary that corresponds to having taken a forwards + * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + */ + pragma[inline] + TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) { + exists(StepSummary summary | + StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and + result = this.append(pragma[only_bind_into](summary)) + ) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + * + * Unlike `TypeTracker::step`, this predicate exposes all edges + * in the flow graph, and not just the edges between `Node`s. + * It may therefore be less performant. + * + * Type tracking predicates using small steps typically take the following form: + * ```ql + * DataFlow::Node myType(DataFlow::TypeTracker t) { + * t.start() and + * result = < source of myType > + * or + * exists (DataFlow::TypeTracker t2 | + * t = t2.smallstep(myType(t2), result) + * ) + * } + * + * DataFlow::Node myType() { + * result = myType(DataFlow::TypeTracker::end()) + * } + * ``` + */ + pragma[inline] + TypeTracker smallstep(Node nodeFrom, Node nodeTo) { + exists(StepSummary summary | + StepSummary::smallstep(nodeFrom, nodeTo, summary) and + result = this.append(summary) + ) + or + simpleLocalFlowStep(nodeFrom, nodeTo) and + result = this + } +} + +/** Provides predicates for implementing custom `TypeTracker`s. */ +module TypeTracker { + /** + * Gets a valid end point of type tracking. + */ + TypeTracker end() { result.end() } +} + +private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content) + +/** + * Summary of the steps needed to back-track a use of a value to a given dataflow node. + * + * This can for example be used to track callbacks that are passed to a certain API, + * so we can model specific parameters of that callback as having a certain type. + * + * Note that type back-tracking does not provide a source/sink relation, that is, + * it may determine that a node will be used in an API call somewhere, but it won't + * determine exactly where that use was, or the path that led to the use. + * + * It is recommended that all uses of this type are written in the following form, + * for back-tracking some callback type `myCallback`: + * + * ```ql + * DataFlow::TypeTrackingNode myCallback(DataFlow::TypeBackTracker t) { + * t.start() and + * result = (< some API call >).getArgument(< n >).getALocalSource() + * or + * exists (DataFlow::TypeBackTracker t2 | + * result = myCallback(t2).backtrack(t2, t) + * ) + * } + * + * DataFlow::TypeTrackingNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) } + * ``` + * + * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent + * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual + * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`. + */ +class TypeBackTracker extends TTypeBackTracker { + Boolean hasReturn; + string content; + + TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) } + + /** Gets the summary resulting from prepending `step` to this type-tracking summary. */ + TypeBackTracker prepend(StepSummary step) { + step = LevelStep() and result = this + or + step = CallStep() and hasReturn = false and result = this + or + step = ReturnStep() and result = MkTypeBackTracker(true, content) + or + exists(string p | + step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p) + ) + or + step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "") + } + + /** Gets a textual representation of this summary. */ + string toString() { + exists(string withReturn, string withContent | + (if hasReturn = true then withReturn = "with" else withReturn = "without") and + (if content != "" then withContent = " with content " + content else withContent = "") and + result = "type back-tracker " + withReturn + " return steps" + withContent + ) + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { hasReturn = false and content = "" } + + /** + * Holds if this is the end point of type tracking. + */ + predicate end() { content = "" } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been back-tracked into a call through return edge. + */ + boolean hasReturn() { result = hasReturn } + + /** + * Gets a type tracker that starts where this one has left off to allow continued + * tracking. + * + * This predicate is only defined if the type has not been tracked into a piece of content. + */ + TypeBackTracker continue() { content = "" and result = this } + + /** + * Gets the summary that corresponds to having taken a backwards + * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + */ + pragma[inline] + TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) { + exists(StepSummary summary | + StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and + this = result.prepend(pragma[only_bind_into](summary)) + ) + } + + /** + * Gets the summary that corresponds to having taken a backwards + * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + * + * Unlike `TypeBackTracker::step`, this predicate exposes all edges + * in the flowgraph, and not just the edges between + * `TypeTrackingNode`s. It may therefore be less performant. + * + * Type tracking predicates using small steps typically take the following form: + * ```ql + * DataFlow::Node myType(DataFlow::TypeBackTracker t) { + * t.start() and + * result = < some API call >.getArgument(< n >) + * or + * exists (DataFlow::TypeBackTracker t2 | + * t = t2.smallstep(result, myType(t2)) + * ) + * } + * + * DataFlow::Node myType() { + * result = myType(DataFlow::TypeBackTracker::end()) + * } + * ``` + */ + pragma[inline] + TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) { + exists(StepSummary summary | + StepSummary::smallstep(nodeFrom, nodeTo, summary) and + this = result.prepend(summary) + ) + or + simpleLocalFlowStep(nodeFrom, nodeTo) and + this = result + } +} + +/** Provides predicates for implementing custom `TypeBackTracker`s. */ +module TypeBackTracker { + /** + * Gets a valid end point of type back-tracking. + */ + TypeBackTracker end() { result.end() } +} diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll new file mode 100644 index 00000000000..40beb734d37 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -0,0 +1,146 @@ +private import codeql.ruby.AST as AST +private import codeql.ruby.CFG as CFG +private import CFG::CfgNodes +private import codeql.ruby.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlowPublic +private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch +private import codeql.ruby.dataflow.internal.SsaImpl as SsaImpl + +class Node = DataFlowPublic::Node; + +class TypeTrackingNode = DataFlowPublic::LocalSourceNode; + +predicate simpleLocalFlowStep = DataFlowPrivate::localFlowStepTypeTracker/2; + +predicate jumpStep = DataFlowPrivate::jumpStep/2; + +/** + * Gets the name of a possible piece of content. This will usually include things like + * + * - Attribute names (in Python) + * - Property names (in JavaScript) + */ +string getPossibleContentName() { result = getSetterCallAttributeName(_) } + +/** + * Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. + * + * Flow into summarized library methods is not included, as that will lead to negative + * recursion (or, at best, terrible performance), since identifying calls to library + * methods is done using API graphs (which uses type tracking). + */ +predicate callStep(Node nodeFrom, Node nodeTo) { + exists(ExprNodes::CallCfgNode call, CFG::CfgScope callable, int i | + DataFlowDispatch::getTarget(call) = callable and + nodeFrom.(DataFlowPrivate::ArgumentNode).sourceArgumentOf(call, i) and + nodeTo.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable, i) + ) + or + // In normal data-flow, this will be a local flow step. But for type tracking + // we model it as a call step, in order to avoid computing a potential + // self-cross product of all calls to a function that returns one of its parameters + // (only to later filter that flow out using `TypeTracker::append`). + nodeTo = + DataFlowPrivate::LocalFlow::getParameterDefNode(nodeFrom + .(DataFlowPublic::ParameterNode) + .getParameter()) +} + +/** + * Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. + * + * Flow out of summarized library methods is not included, as that will lead to negative + * recursion (or, at best, terrible performance), since identifying calls to library + * methods is done using API graphs (which uses type tracking). + */ +predicate returnStep(Node nodeFrom, Node nodeTo) { + exists(ExprNodes::CallCfgNode call | + nodeFrom instanceof DataFlowPrivate::ReturnNode and + nodeFrom.(DataFlowPrivate::NodeImpl).getCfgScope() = DataFlowDispatch::getTarget(call) and + nodeTo.asExpr().getNode() = call.getNode() + ) + or + // In normal data-flow, this will be a local flow step. But for type tracking + // we model it as a returning flow step, in order to avoid computing a potential + // self-cross product of all calls to a function that returns one of its parameters + // (only to later filter that flow out using `TypeTracker::append`). + nodeTo.(DataFlowPrivate::SynthReturnNode).getAnInput() = nodeFrom +} + +/** + * Holds if `nodeFrom` is being written to the `content` content of the object + * in `nodeTo`. + * + * Note that the choice of `nodeTo` does not have to make sense + * "chronologically". All we care about is whether the `content` content of + * `nodeTo` can have a specific type, and the assumption is that if a specific + * type appears here, then any access of that particular content can yield + * something of that particular type. + * + * Thus, in an example such as + * + * ```rb + * def foo(y) + * x = Foo.new + * bar(x) + * x.content = y + * baz(x) + * end + * + * def bar(x) + * z = x.content + * end + * ``` + * for the content write `x.content = y`, we will have `content` being the + * literal string `"content"`, `nodeFrom` will be `y`, and `nodeTo` will be the + * `Foo` object created on the first line of the function. This means we will + * track the fact that `x.content` can have the type of `y` into the assignment + * to `z` inside `bar`, even though this content write happens _after_ `bar` is + * called. + */ +predicate basicStoreStep(Node nodeFrom, DataFlowPublic::LocalSourceNode nodeTo, string content) { + // TODO: support SetterMethodCall inside TuplePattern + exists(ExprNodes::MethodCallCfgNode call | + content = getSetterCallAttributeName(call.getExpr()) and + nodeTo.(DataFlowPublic::ExprNode).getExprNode() = call.getReceiver() and + call.getExpr() instanceof AST::SetterMethodCall and + call.getArgument(call.getNumberOfArguments() - 1) = + nodeFrom.(DataFlowPublic::ExprNode).getExprNode() + ) +} + +/** + * Returns the name of the attribute being set by the setter method call, i.e. + * the name of the setter method without the trailing `=`. In the following + * example, the result is `"bar"`. + * + * ```rb + * foo.bar = 1 + * ``` + */ +private string getSetterCallAttributeName(AST::SetterMethodCall call) { + // TODO: this should be exposed in `SetterMethodCall` + exists(string setterName | + setterName = call.getMethodName() and result = setterName.prefix(setterName.length() - 1) + ) +} + +/** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ +predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) { + exists(ExprNodes::MethodCallCfgNode call | + call.getExpr().getNumberOfArguments() = 0 and + content = call.getExpr().(AST::MethodCall).getMethodName() and + nodeFrom.asExpr() = call.getReceiver() and + nodeTo.asExpr() = call + ) +} + +/** + * A utility class that is equivalent to `boolean` but does not require type joining. + */ +class Boolean extends boolean { + Boolean() { this = true or this = false } +} diff --git a/repo-tests/codeql-ruby/ql/lib/qlpack.yml b/repo-tests/codeql-ruby/ql/lib/qlpack.yml new file mode 100644 index 00000000000..91f40532fc9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/qlpack.yml @@ -0,0 +1,6 @@ +name: codeql/ruby-all +version: 0.0.2 +extractor: ruby +dbscheme: ruby.dbscheme +upgrades: upgrades +library: true diff --git a/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme b/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme new file mode 100644 index 00000000000..f36dd8a35ce --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme @@ -0,0 +1,1318 @@ +// CodeQL database schema for Ruby +// Automatically generated from the tree-sitter grammar; do not edit + +@location = @location_default + +locations_default( + unique int id: @location_default, + int file: @file ref, + int start_line: int ref, + int start_column: int ref, + int end_line: int ref, + int end_column: int ref +); + +files( + unique int id: @file, + string name: string ref +); + +folders( + unique int id: @folder, + string name: string ref +); + +@container = @file | @folder + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +sourceLocationPrefix( + string prefix: string ref +); + +diagnostics( + unique int id: @diagnostic, + int severity: int ref, + string error_tag: string ref, + string error_message: string ref, + string full_error_message: string ref, + int location: @location_default ref +); + +case @diagnostic.severity of + 10 = @diagnostic_debug +| 20 = @diagnostic_info +| 30 = @diagnostic_warning +| 40 = @diagnostic_error +; + + +@ruby_underscore_arg = @ruby_assignment | @ruby_binary | @ruby_conditional | @ruby_operator_assignment | @ruby_range | @ruby_unary | @ruby_underscore_primary + +@ruby_underscore_lhs = @ruby_call | @ruby_element_reference | @ruby_scope_resolution | @ruby_token_false | @ruby_token_nil | @ruby_token_true | @ruby_underscore_variable + +@ruby_underscore_method_name = @ruby_delimited_symbol | @ruby_setter | @ruby_token_class_variable | @ruby_token_constant | @ruby_token_global_variable | @ruby_token_identifier | @ruby_token_instance_variable | @ruby_token_operator | @ruby_token_simple_symbol + +@ruby_underscore_primary = @ruby_array | @ruby_begin | @ruby_break | @ruby_case__ | @ruby_chained_string | @ruby_class | @ruby_delimited_symbol | @ruby_for | @ruby_hash | @ruby_if | @ruby_lambda | @ruby_method | @ruby_module | @ruby_next | @ruby_parenthesized_statements | @ruby_rational | @ruby_redo | @ruby_regex | @ruby_retry | @ruby_return | @ruby_singleton_class | @ruby_singleton_method | @ruby_string__ | @ruby_string_array | @ruby_subshell | @ruby_symbol_array | @ruby_token_character | @ruby_token_complex | @ruby_token_float | @ruby_token_heredoc_beginning | @ruby_token_integer | @ruby_token_simple_symbol | @ruby_unary | @ruby_underscore_lhs | @ruby_unless | @ruby_until | @ruby_while | @ruby_yield + +@ruby_underscore_statement = @ruby_alias | @ruby_assignment | @ruby_begin_block | @ruby_binary | @ruby_break | @ruby_call | @ruby_end_block | @ruby_if_modifier | @ruby_next | @ruby_operator_assignment | @ruby_rescue_modifier | @ruby_return | @ruby_unary | @ruby_undef | @ruby_underscore_arg | @ruby_unless_modifier | @ruby_until_modifier | @ruby_while_modifier | @ruby_yield + +@ruby_underscore_variable = @ruby_token_class_variable | @ruby_token_constant | @ruby_token_global_variable | @ruby_token_identifier | @ruby_token_instance_variable | @ruby_token_self | @ruby_token_super + +ruby_alias_def( + unique int id: @ruby_alias, + int alias: @ruby_underscore_method_name ref, + int name: @ruby_underscore_method_name ref, + int loc: @location ref +); + +@ruby_argument_list_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield + +#keyset[ruby_argument_list, index] +ruby_argument_list_child( + int ruby_argument_list: @ruby_argument_list ref, + int index: int ref, + unique int child: @ruby_argument_list_child_type ref +); + +ruby_argument_list_def( + unique int id: @ruby_argument_list, + int loc: @location ref +); + +@ruby_array_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield + +#keyset[ruby_array, index] +ruby_array_child( + int ruby_array: @ruby_array ref, + int index: int ref, + unique int child: @ruby_array_child_type ref +); + +ruby_array_def( + unique int id: @ruby_array, + int loc: @location ref +); + +@ruby_assignment_left_type = @ruby_left_assignment_list | @ruby_underscore_lhs + +@ruby_assignment_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_right_assignment_list | @ruby_splat_argument | @ruby_underscore_arg | @ruby_yield + +ruby_assignment_def( + unique int id: @ruby_assignment, + int left: @ruby_assignment_left_type ref, + int right: @ruby_assignment_right_type ref, + int loc: @location ref +); + +@ruby_bare_string_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_bare_string, index] +ruby_bare_string_child( + int ruby_bare_string: @ruby_bare_string ref, + int index: int ref, + unique int child: @ruby_bare_string_child_type ref +); + +ruby_bare_string_def( + unique int id: @ruby_bare_string, + int loc: @location ref +); + +@ruby_bare_symbol_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_bare_symbol, index] +ruby_bare_symbol_child( + int ruby_bare_symbol: @ruby_bare_symbol ref, + int index: int ref, + unique int child: @ruby_bare_symbol_child_type ref +); + +ruby_bare_symbol_def( + unique int id: @ruby_bare_symbol, + int loc: @location ref +); + +@ruby_begin_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_begin, index] +ruby_begin_child( + int ruby_begin: @ruby_begin ref, + int index: int ref, + unique int child: @ruby_begin_child_type ref +); + +ruby_begin_def( + unique int id: @ruby_begin, + int loc: @location ref +); + +@ruby_begin_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_begin_block, index] +ruby_begin_block_child( + int ruby_begin_block: @ruby_begin_block ref, + int index: int ref, + unique int child: @ruby_begin_block_child_type ref +); + +ruby_begin_block_def( + unique int id: @ruby_begin_block, + int loc: @location ref +); + +@ruby_binary_left_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +case @ruby_binary.operator of + 0 = @ruby_binary_bangequal +| 1 = @ruby_binary_bangtilde +| 2 = @ruby_binary_percent +| 3 = @ruby_binary_ampersand +| 4 = @ruby_binary_ampersandampersand +| 5 = @ruby_binary_star +| 6 = @ruby_binary_starstar +| 7 = @ruby_binary_plus +| 8 = @ruby_binary_minus +| 9 = @ruby_binary_slash +| 10 = @ruby_binary_langle +| 11 = @ruby_binary_langlelangle +| 12 = @ruby_binary_langleequal +| 13 = @ruby_binary_langleequalrangle +| 14 = @ruby_binary_equalequal +| 15 = @ruby_binary_equalequalequal +| 16 = @ruby_binary_equaltilde +| 17 = @ruby_binary_rangle +| 18 = @ruby_binary_rangleequal +| 19 = @ruby_binary_ranglerangle +| 20 = @ruby_binary_caret +| 21 = @ruby_binary_and +| 22 = @ruby_binary_or +| 23 = @ruby_binary_pipe +| 24 = @ruby_binary_pipepipe +; + + +@ruby_binary_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_binary_def( + unique int id: @ruby_binary, + int left: @ruby_binary_left_type ref, + int operator: int ref, + int right: @ruby_binary_right_type ref, + int loc: @location ref +); + +ruby_block_parameters( + unique int ruby_block: @ruby_block ref, + unique int parameters: @ruby_block_parameters ref +); + +@ruby_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_block, index] +ruby_block_child( + int ruby_block: @ruby_block ref, + int index: int ref, + unique int child: @ruby_block_child_type ref +); + +ruby_block_def( + unique int id: @ruby_block, + int loc: @location ref +); + +ruby_block_argument_def( + unique int id: @ruby_block_argument, + int child: @ruby_underscore_arg ref, + int loc: @location ref +); + +ruby_block_parameter_def( + unique int id: @ruby_block_parameter, + int name: @ruby_token_identifier ref, + int loc: @location ref +); + +@ruby_block_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier + +#keyset[ruby_block_parameters, index] +ruby_block_parameters_child( + int ruby_block_parameters: @ruby_block_parameters ref, + int index: int ref, + unique int child: @ruby_block_parameters_child_type ref +); + +ruby_block_parameters_def( + unique int id: @ruby_block_parameters, + int loc: @location ref +); + +ruby_break_child( + unique int ruby_break: @ruby_break ref, + unique int child: @ruby_argument_list ref +); + +ruby_break_def( + unique int id: @ruby_break, + int loc: @location ref +); + +ruby_call_arguments( + unique int ruby_call: @ruby_call ref, + unique int arguments: @ruby_argument_list ref +); + +@ruby_call_block_type = @ruby_block | @ruby_do_block + +ruby_call_block( + unique int ruby_call: @ruby_call ref, + unique int block: @ruby_call_block_type ref +); + +@ruby_call_method_type = @ruby_argument_list | @ruby_scope_resolution | @ruby_token_operator | @ruby_underscore_variable + +@ruby_call_receiver_type = @ruby_call | @ruby_underscore_primary + +ruby_call_receiver( + unique int ruby_call: @ruby_call ref, + unique int receiver: @ruby_call_receiver_type ref +); + +ruby_call_def( + unique int id: @ruby_call, + int method: @ruby_call_method_type ref, + int loc: @location ref +); + +ruby_case_value( + unique int ruby_case__: @ruby_case__ ref, + unique int value: @ruby_underscore_statement ref +); + +@ruby_case_child_type = @ruby_else | @ruby_when + +#keyset[ruby_case__, index] +ruby_case_child( + int ruby_case__: @ruby_case__ ref, + int index: int ref, + unique int child: @ruby_case_child_type ref +); + +ruby_case_def( + unique int id: @ruby_case__, + int loc: @location ref +); + +#keyset[ruby_chained_string, index] +ruby_chained_string_child( + int ruby_chained_string: @ruby_chained_string ref, + int index: int ref, + unique int child: @ruby_string__ ref +); + +ruby_chained_string_def( + unique int id: @ruby_chained_string, + int loc: @location ref +); + +@ruby_class_name_type = @ruby_scope_resolution | @ruby_token_constant + +ruby_class_superclass( + unique int ruby_class: @ruby_class ref, + unique int superclass: @ruby_superclass ref +); + +@ruby_class_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_class, index] +ruby_class_child( + int ruby_class: @ruby_class ref, + int index: int ref, + unique int child: @ruby_class_child_type ref +); + +ruby_class_def( + unique int id: @ruby_class, + int name: @ruby_class_name_type ref, + int loc: @location ref +); + +ruby_conditional_def( + unique int id: @ruby_conditional, + int alternative: @ruby_underscore_arg ref, + int condition: @ruby_underscore_arg ref, + int consequence: @ruby_underscore_arg ref, + int loc: @location ref +); + +@ruby_delimited_symbol_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_delimited_symbol, index] +ruby_delimited_symbol_child( + int ruby_delimited_symbol: @ruby_delimited_symbol ref, + int index: int ref, + unique int child: @ruby_delimited_symbol_child_type ref +); + +ruby_delimited_symbol_def( + unique int id: @ruby_delimited_symbol, + int loc: @location ref +); + +@ruby_destructured_left_assignment_child_type = @ruby_destructured_left_assignment | @ruby_rest_assignment | @ruby_underscore_lhs + +#keyset[ruby_destructured_left_assignment, index] +ruby_destructured_left_assignment_child( + int ruby_destructured_left_assignment: @ruby_destructured_left_assignment ref, + int index: int ref, + unique int child: @ruby_destructured_left_assignment_child_type ref +); + +ruby_destructured_left_assignment_def( + unique int id: @ruby_destructured_left_assignment, + int loc: @location ref +); + +@ruby_destructured_parameter_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier + +#keyset[ruby_destructured_parameter, index] +ruby_destructured_parameter_child( + int ruby_destructured_parameter: @ruby_destructured_parameter ref, + int index: int ref, + unique int child: @ruby_destructured_parameter_child_type ref +); + +ruby_destructured_parameter_def( + unique int id: @ruby_destructured_parameter, + int loc: @location ref +); + +@ruby_do_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_do, index] +ruby_do_child( + int ruby_do: @ruby_do ref, + int index: int ref, + unique int child: @ruby_do_child_type ref +); + +ruby_do_def( + unique int id: @ruby_do, + int loc: @location ref +); + +ruby_do_block_parameters( + unique int ruby_do_block: @ruby_do_block ref, + unique int parameters: @ruby_block_parameters ref +); + +@ruby_do_block_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_do_block, index] +ruby_do_block_child( + int ruby_do_block: @ruby_do_block ref, + int index: int ref, + unique int child: @ruby_do_block_child_type ref +); + +ruby_do_block_def( + unique int id: @ruby_do_block, + int loc: @location ref +); + +@ruby_element_reference_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield + +#keyset[ruby_element_reference, index] +ruby_element_reference_child( + int ruby_element_reference: @ruby_element_reference ref, + int index: int ref, + unique int child: @ruby_element_reference_child_type ref +); + +ruby_element_reference_def( + unique int id: @ruby_element_reference, + int object: @ruby_underscore_primary ref, + int loc: @location ref +); + +@ruby_else_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_else, index] +ruby_else_child( + int ruby_else: @ruby_else ref, + int index: int ref, + unique int child: @ruby_else_child_type ref +); + +ruby_else_def( + unique int id: @ruby_else, + int loc: @location ref +); + +@ruby_elsif_alternative_type = @ruby_else | @ruby_elsif + +ruby_elsif_alternative( + unique int ruby_elsif: @ruby_elsif ref, + unique int alternative: @ruby_elsif_alternative_type ref +); + +ruby_elsif_consequence( + unique int ruby_elsif: @ruby_elsif ref, + unique int consequence: @ruby_then ref +); + +ruby_elsif_def( + unique int id: @ruby_elsif, + int condition: @ruby_underscore_statement ref, + int loc: @location ref +); + +@ruby_end_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_end_block, index] +ruby_end_block_child( + int ruby_end_block: @ruby_end_block ref, + int index: int ref, + unique int child: @ruby_end_block_child_type ref +); + +ruby_end_block_def( + unique int id: @ruby_end_block, + int loc: @location ref +); + +@ruby_ensure_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_ensure, index] +ruby_ensure_child( + int ruby_ensure: @ruby_ensure ref, + int index: int ref, + unique int child: @ruby_ensure_child_type ref +); + +ruby_ensure_def( + unique int id: @ruby_ensure, + int loc: @location ref +); + +ruby_exception_variable_def( + unique int id: @ruby_exception_variable, + int child: @ruby_underscore_lhs ref, + int loc: @location ref +); + +@ruby_exceptions_child_type = @ruby_splat_argument | @ruby_underscore_arg + +#keyset[ruby_exceptions, index] +ruby_exceptions_child( + int ruby_exceptions: @ruby_exceptions ref, + int index: int ref, + unique int child: @ruby_exceptions_child_type ref +); + +ruby_exceptions_def( + unique int id: @ruby_exceptions, + int loc: @location ref +); + +@ruby_for_pattern_type = @ruby_left_assignment_list | @ruby_underscore_lhs + +ruby_for_def( + unique int id: @ruby_for, + int body: @ruby_do ref, + int pattern: @ruby_for_pattern_type ref, + int value: @ruby_in ref, + int loc: @location ref +); + +@ruby_hash_child_type = @ruby_hash_splat_argument | @ruby_pair + +#keyset[ruby_hash, index] +ruby_hash_child( + int ruby_hash: @ruby_hash ref, + int index: int ref, + unique int child: @ruby_hash_child_type ref +); + +ruby_hash_def( + unique int id: @ruby_hash, + int loc: @location ref +); + +ruby_hash_splat_argument_def( + unique int id: @ruby_hash_splat_argument, + int child: @ruby_underscore_arg ref, + int loc: @location ref +); + +ruby_hash_splat_parameter_name( + unique int ruby_hash_splat_parameter: @ruby_hash_splat_parameter ref, + unique int name: @ruby_token_identifier ref +); + +ruby_hash_splat_parameter_def( + unique int id: @ruby_hash_splat_parameter, + int loc: @location ref +); + +@ruby_heredoc_body_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_heredoc_content | @ruby_token_heredoc_end + +#keyset[ruby_heredoc_body, index] +ruby_heredoc_body_child( + int ruby_heredoc_body: @ruby_heredoc_body ref, + int index: int ref, + unique int child: @ruby_heredoc_body_child_type ref +); + +ruby_heredoc_body_def( + unique int id: @ruby_heredoc_body, + int loc: @location ref +); + +@ruby_if_alternative_type = @ruby_else | @ruby_elsif + +ruby_if_alternative( + unique int ruby_if: @ruby_if ref, + unique int alternative: @ruby_if_alternative_type ref +); + +ruby_if_consequence( + unique int ruby_if: @ruby_if ref, + unique int consequence: @ruby_then ref +); + +ruby_if_def( + unique int id: @ruby_if, + int condition: @ruby_underscore_statement ref, + int loc: @location ref +); + +@ruby_if_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_if_modifier_def( + unique int id: @ruby_if_modifier, + int body: @ruby_underscore_statement ref, + int condition: @ruby_if_modifier_condition_type ref, + int loc: @location ref +); + +ruby_in_def( + unique int id: @ruby_in, + int child: @ruby_underscore_arg ref, + int loc: @location ref +); + +@ruby_interpolation_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_interpolation, index] +ruby_interpolation_child( + int ruby_interpolation: @ruby_interpolation ref, + int index: int ref, + unique int child: @ruby_interpolation_child_type ref +); + +ruby_interpolation_def( + unique int id: @ruby_interpolation, + int loc: @location ref +); + +ruby_keyword_parameter_value( + unique int ruby_keyword_parameter: @ruby_keyword_parameter ref, + unique int value: @ruby_underscore_arg ref +); + +ruby_keyword_parameter_def( + unique int id: @ruby_keyword_parameter, + int name: @ruby_token_identifier ref, + int loc: @location ref +); + +@ruby_lambda_body_type = @ruby_block | @ruby_do_block + +ruby_lambda_parameters( + unique int ruby_lambda: @ruby_lambda ref, + unique int parameters: @ruby_lambda_parameters ref +); + +ruby_lambda_def( + unique int id: @ruby_lambda, + int body: @ruby_lambda_body_type ref, + int loc: @location ref +); + +@ruby_lambda_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier + +#keyset[ruby_lambda_parameters, index] +ruby_lambda_parameters_child( + int ruby_lambda_parameters: @ruby_lambda_parameters ref, + int index: int ref, + unique int child: @ruby_lambda_parameters_child_type ref +); + +ruby_lambda_parameters_def( + unique int id: @ruby_lambda_parameters, + int loc: @location ref +); + +@ruby_left_assignment_list_child_type = @ruby_destructured_left_assignment | @ruby_rest_assignment | @ruby_underscore_lhs + +#keyset[ruby_left_assignment_list, index] +ruby_left_assignment_list_child( + int ruby_left_assignment_list: @ruby_left_assignment_list ref, + int index: int ref, + unique int child: @ruby_left_assignment_list_child_type ref +); + +ruby_left_assignment_list_def( + unique int id: @ruby_left_assignment_list, + int loc: @location ref +); + +ruby_method_parameters( + unique int ruby_method: @ruby_method ref, + unique int parameters: @ruby_method_parameters ref +); + +@ruby_method_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_method, index] +ruby_method_child( + int ruby_method: @ruby_method ref, + int index: int ref, + unique int child: @ruby_method_child_type ref +); + +ruby_method_def( + unique int id: @ruby_method, + int name: @ruby_underscore_method_name ref, + int loc: @location ref +); + +@ruby_method_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier + +#keyset[ruby_method_parameters, index] +ruby_method_parameters_child( + int ruby_method_parameters: @ruby_method_parameters ref, + int index: int ref, + unique int child: @ruby_method_parameters_child_type ref +); + +ruby_method_parameters_def( + unique int id: @ruby_method_parameters, + int loc: @location ref +); + +@ruby_module_name_type = @ruby_scope_resolution | @ruby_token_constant + +@ruby_module_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_module, index] +ruby_module_child( + int ruby_module: @ruby_module ref, + int index: int ref, + unique int child: @ruby_module_child_type ref +); + +ruby_module_def( + unique int id: @ruby_module, + int name: @ruby_module_name_type ref, + int loc: @location ref +); + +ruby_next_child( + unique int ruby_next: @ruby_next ref, + unique int child: @ruby_argument_list ref +); + +ruby_next_def( + unique int id: @ruby_next, + int loc: @location ref +); + +case @ruby_operator_assignment.operator of + 0 = @ruby_operator_assignment_percentequal +| 1 = @ruby_operator_assignment_ampersandampersandequal +| 2 = @ruby_operator_assignment_ampersandequal +| 3 = @ruby_operator_assignment_starstarequal +| 4 = @ruby_operator_assignment_starequal +| 5 = @ruby_operator_assignment_plusequal +| 6 = @ruby_operator_assignment_minusequal +| 7 = @ruby_operator_assignment_slashequal +| 8 = @ruby_operator_assignment_langlelangleequal +| 9 = @ruby_operator_assignment_ranglerangleequal +| 10 = @ruby_operator_assignment_caretequal +| 11 = @ruby_operator_assignment_pipeequal +| 12 = @ruby_operator_assignment_pipepipeequal +; + + +@ruby_operator_assignment_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_operator_assignment_def( + unique int id: @ruby_operator_assignment, + int left: @ruby_underscore_lhs ref, + int operator: int ref, + int right: @ruby_operator_assignment_right_type ref, + int loc: @location ref +); + +ruby_optional_parameter_def( + unique int id: @ruby_optional_parameter, + int name: @ruby_token_identifier ref, + int value: @ruby_underscore_arg ref, + int loc: @location ref +); + +@ruby_pair_key_type = @ruby_string__ | @ruby_token_hash_key_symbol | @ruby_underscore_arg + +ruby_pair_def( + unique int id: @ruby_pair, + int key__: @ruby_pair_key_type ref, + int value: @ruby_underscore_arg ref, + int loc: @location ref +); + +@ruby_parenthesized_statements_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_parenthesized_statements, index] +ruby_parenthesized_statements_child( + int ruby_parenthesized_statements: @ruby_parenthesized_statements ref, + int index: int ref, + unique int child: @ruby_parenthesized_statements_child_type ref +); + +ruby_parenthesized_statements_def( + unique int id: @ruby_parenthesized_statements, + int loc: @location ref +); + +@ruby_pattern_child_type = @ruby_splat_argument | @ruby_underscore_arg + +ruby_pattern_def( + unique int id: @ruby_pattern, + int child: @ruby_pattern_child_type ref, + int loc: @location ref +); + +@ruby_program_child_type = @ruby_token_empty_statement | @ruby_token_uninterpreted | @ruby_underscore_statement + +#keyset[ruby_program, index] +ruby_program_child( + int ruby_program: @ruby_program ref, + int index: int ref, + unique int child: @ruby_program_child_type ref +); + +ruby_program_def( + unique int id: @ruby_program, + int loc: @location ref +); + +ruby_range_begin( + unique int ruby_range: @ruby_range ref, + unique int begin: @ruby_underscore_arg ref +); + +ruby_range_end( + unique int ruby_range: @ruby_range ref, + unique int end: @ruby_underscore_arg ref +); + +case @ruby_range.operator of + 0 = @ruby_range_dotdot +| 1 = @ruby_range_dotdotdot +; + + +ruby_range_def( + unique int id: @ruby_range, + int operator: int ref, + int loc: @location ref +); + +@ruby_rational_child_type = @ruby_token_float | @ruby_token_integer + +ruby_rational_def( + unique int id: @ruby_rational, + int child: @ruby_rational_child_type ref, + int loc: @location ref +); + +ruby_redo_child( + unique int ruby_redo: @ruby_redo ref, + unique int child: @ruby_argument_list ref +); + +ruby_redo_def( + unique int id: @ruby_redo, + int loc: @location ref +); + +@ruby_regex_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_regex, index] +ruby_regex_child( + int ruby_regex: @ruby_regex ref, + int index: int ref, + unique int child: @ruby_regex_child_type ref +); + +ruby_regex_def( + unique int id: @ruby_regex, + int loc: @location ref +); + +ruby_rescue_body( + unique int ruby_rescue: @ruby_rescue ref, + unique int body: @ruby_then ref +); + +ruby_rescue_exceptions( + unique int ruby_rescue: @ruby_rescue ref, + unique int exceptions: @ruby_exceptions ref +); + +ruby_rescue_variable( + unique int ruby_rescue: @ruby_rescue ref, + unique int variable: @ruby_exception_variable ref +); + +ruby_rescue_def( + unique int id: @ruby_rescue, + int loc: @location ref +); + +@ruby_rescue_modifier_handler_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_rescue_modifier_def( + unique int id: @ruby_rescue_modifier, + int body: @ruby_underscore_statement ref, + int handler: @ruby_rescue_modifier_handler_type ref, + int loc: @location ref +); + +ruby_rest_assignment_child( + unique int ruby_rest_assignment: @ruby_rest_assignment ref, + unique int child: @ruby_underscore_lhs ref +); + +ruby_rest_assignment_def( + unique int id: @ruby_rest_assignment, + int loc: @location ref +); + +ruby_retry_child( + unique int ruby_retry: @ruby_retry ref, + unique int child: @ruby_argument_list ref +); + +ruby_retry_def( + unique int id: @ruby_retry, + int loc: @location ref +); + +ruby_return_child( + unique int ruby_return: @ruby_return ref, + unique int child: @ruby_argument_list ref +); + +ruby_return_def( + unique int id: @ruby_return, + int loc: @location ref +); + +@ruby_right_assignment_list_child_type = @ruby_splat_argument | @ruby_underscore_arg + +#keyset[ruby_right_assignment_list, index] +ruby_right_assignment_list_child( + int ruby_right_assignment_list: @ruby_right_assignment_list ref, + int index: int ref, + unique int child: @ruby_right_assignment_list_child_type ref +); + +ruby_right_assignment_list_def( + unique int id: @ruby_right_assignment_list, + int loc: @location ref +); + +@ruby_scope_resolution_name_type = @ruby_token_constant | @ruby_token_identifier + +ruby_scope_resolution_scope( + unique int ruby_scope_resolution: @ruby_scope_resolution ref, + unique int scope: @ruby_underscore_primary ref +); + +ruby_scope_resolution_def( + unique int id: @ruby_scope_resolution, + int name: @ruby_scope_resolution_name_type ref, + int loc: @location ref +); + +ruby_setter_def( + unique int id: @ruby_setter, + int name: @ruby_token_identifier ref, + int loc: @location ref +); + +@ruby_singleton_class_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_singleton_class, index] +ruby_singleton_class_child( + int ruby_singleton_class: @ruby_singleton_class ref, + int index: int ref, + unique int child: @ruby_singleton_class_child_type ref +); + +ruby_singleton_class_def( + unique int id: @ruby_singleton_class, + int value: @ruby_underscore_arg ref, + int loc: @location ref +); + +@ruby_singleton_method_object_type = @ruby_underscore_arg | @ruby_underscore_variable + +ruby_singleton_method_parameters( + unique int ruby_singleton_method: @ruby_singleton_method ref, + unique int parameters: @ruby_method_parameters ref +); + +@ruby_singleton_method_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_singleton_method, index] +ruby_singleton_method_child( + int ruby_singleton_method: @ruby_singleton_method ref, + int index: int ref, + unique int child: @ruby_singleton_method_child_type ref +); + +ruby_singleton_method_def( + unique int id: @ruby_singleton_method, + int name: @ruby_underscore_method_name ref, + int object: @ruby_singleton_method_object_type ref, + int loc: @location ref +); + +ruby_splat_argument_def( + unique int id: @ruby_splat_argument, + int child: @ruby_underscore_arg ref, + int loc: @location ref +); + +ruby_splat_parameter_name( + unique int ruby_splat_parameter: @ruby_splat_parameter ref, + unique int name: @ruby_token_identifier ref +); + +ruby_splat_parameter_def( + unique int id: @ruby_splat_parameter, + int loc: @location ref +); + +@ruby_string_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_string__, index] +ruby_string_child( + int ruby_string__: @ruby_string__ ref, + int index: int ref, + unique int child: @ruby_string_child_type ref +); + +ruby_string_def( + unique int id: @ruby_string__, + int loc: @location ref +); + +#keyset[ruby_string_array, index] +ruby_string_array_child( + int ruby_string_array: @ruby_string_array ref, + int index: int ref, + unique int child: @ruby_bare_string ref +); + +ruby_string_array_def( + unique int id: @ruby_string_array, + int loc: @location ref +); + +@ruby_subshell_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content + +#keyset[ruby_subshell, index] +ruby_subshell_child( + int ruby_subshell: @ruby_subshell ref, + int index: int ref, + unique int child: @ruby_subshell_child_type ref +); + +ruby_subshell_def( + unique int id: @ruby_subshell, + int loc: @location ref +); + +@ruby_superclass_child_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_superclass_def( + unique int id: @ruby_superclass, + int child: @ruby_superclass_child_type ref, + int loc: @location ref +); + +#keyset[ruby_symbol_array, index] +ruby_symbol_array_child( + int ruby_symbol_array: @ruby_symbol_array ref, + int index: int ref, + unique int child: @ruby_bare_symbol ref +); + +ruby_symbol_array_def( + unique int id: @ruby_symbol_array, + int loc: @location ref +); + +@ruby_then_child_type = @ruby_token_empty_statement | @ruby_underscore_statement + +#keyset[ruby_then, index] +ruby_then_child( + int ruby_then: @ruby_then ref, + int index: int ref, + unique int child: @ruby_then_child_type ref +); + +ruby_then_def( + unique int id: @ruby_then, + int loc: @location ref +); + +@ruby_unary_operand_type = @ruby_break | @ruby_call | @ruby_next | @ruby_parenthesized_statements | @ruby_return | @ruby_token_float | @ruby_token_integer | @ruby_underscore_arg | @ruby_yield + +case @ruby_unary.operator of + 0 = @ruby_unary_bang +| 1 = @ruby_unary_plus +| 2 = @ruby_unary_minus +| 3 = @ruby_unary_definedquestion +| 4 = @ruby_unary_not +| 5 = @ruby_unary_tilde +; + + +ruby_unary_def( + unique int id: @ruby_unary, + int operand: @ruby_unary_operand_type ref, + int operator: int ref, + int loc: @location ref +); + +#keyset[ruby_undef, index] +ruby_undef_child( + int ruby_undef: @ruby_undef ref, + int index: int ref, + unique int child: @ruby_underscore_method_name ref +); + +ruby_undef_def( + unique int id: @ruby_undef, + int loc: @location ref +); + +@ruby_unless_alternative_type = @ruby_else | @ruby_elsif + +ruby_unless_alternative( + unique int ruby_unless: @ruby_unless ref, + unique int alternative: @ruby_unless_alternative_type ref +); + +ruby_unless_consequence( + unique int ruby_unless: @ruby_unless ref, + unique int consequence: @ruby_then ref +); + +ruby_unless_def( + unique int id: @ruby_unless, + int condition: @ruby_underscore_statement ref, + int loc: @location ref +); + +@ruby_unless_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_unless_modifier_def( + unique int id: @ruby_unless_modifier, + int body: @ruby_underscore_statement ref, + int condition: @ruby_unless_modifier_condition_type ref, + int loc: @location ref +); + +ruby_until_def( + unique int id: @ruby_until, + int body: @ruby_do ref, + int condition: @ruby_underscore_statement ref, + int loc: @location ref +); + +@ruby_until_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_until_modifier_def( + unique int id: @ruby_until_modifier, + int body: @ruby_underscore_statement ref, + int condition: @ruby_until_modifier_condition_type ref, + int loc: @location ref +); + +ruby_when_body( + unique int ruby_when: @ruby_when ref, + unique int body: @ruby_then ref +); + +#keyset[ruby_when, index] +ruby_when_pattern( + int ruby_when: @ruby_when ref, + int index: int ref, + unique int pattern: @ruby_pattern ref +); + +ruby_when_def( + unique int id: @ruby_when, + int loc: @location ref +); + +ruby_while_def( + unique int id: @ruby_while, + int body: @ruby_do ref, + int condition: @ruby_underscore_statement ref, + int loc: @location ref +); + +@ruby_while_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield + +ruby_while_modifier_def( + unique int id: @ruby_while_modifier, + int body: @ruby_underscore_statement ref, + int condition: @ruby_while_modifier_condition_type ref, + int loc: @location ref +); + +ruby_yield_child( + unique int ruby_yield: @ruby_yield ref, + unique int child: @ruby_argument_list ref +); + +ruby_yield_def( + unique int id: @ruby_yield, + int loc: @location ref +); + +ruby_tokeninfo( + unique int id: @ruby_token, + int kind: int ref, + string value: string ref, + int loc: @location ref +); + +case @ruby_token.kind of + 0 = @ruby_reserved_word +| 1 = @ruby_token_character +| 2 = @ruby_token_class_variable +| 3 = @ruby_token_comment +| 4 = @ruby_token_complex +| 5 = @ruby_token_constant +| 6 = @ruby_token_empty_statement +| 7 = @ruby_token_escape_sequence +| 8 = @ruby_token_false +| 9 = @ruby_token_float +| 10 = @ruby_token_forward_argument +| 11 = @ruby_token_forward_parameter +| 12 = @ruby_token_global_variable +| 13 = @ruby_token_hash_key_symbol +| 14 = @ruby_token_heredoc_beginning +| 15 = @ruby_token_heredoc_content +| 16 = @ruby_token_heredoc_end +| 17 = @ruby_token_identifier +| 18 = @ruby_token_instance_variable +| 19 = @ruby_token_integer +| 20 = @ruby_token_nil +| 21 = @ruby_token_operator +| 22 = @ruby_token_self +| 23 = @ruby_token_simple_symbol +| 24 = @ruby_token_string_content +| 25 = @ruby_token_super +| 26 = @ruby_token_true +| 27 = @ruby_token_uninterpreted +; + + +@ruby_ast_node = @ruby_alias | @ruby_argument_list | @ruby_array | @ruby_assignment | @ruby_bare_string | @ruby_bare_symbol | @ruby_begin | @ruby_begin_block | @ruby_binary | @ruby_block | @ruby_block_argument | @ruby_block_parameter | @ruby_block_parameters | @ruby_break | @ruby_call | @ruby_case__ | @ruby_chained_string | @ruby_class | @ruby_conditional | @ruby_delimited_symbol | @ruby_destructured_left_assignment | @ruby_destructured_parameter | @ruby_do | @ruby_do_block | @ruby_element_reference | @ruby_else | @ruby_elsif | @ruby_end_block | @ruby_ensure | @ruby_exception_variable | @ruby_exceptions | @ruby_for | @ruby_hash | @ruby_hash_splat_argument | @ruby_hash_splat_parameter | @ruby_heredoc_body | @ruby_if | @ruby_if_modifier | @ruby_in | @ruby_interpolation | @ruby_keyword_parameter | @ruby_lambda | @ruby_lambda_parameters | @ruby_left_assignment_list | @ruby_method | @ruby_method_parameters | @ruby_module | @ruby_next | @ruby_operator_assignment | @ruby_optional_parameter | @ruby_pair | @ruby_parenthesized_statements | @ruby_pattern | @ruby_program | @ruby_range | @ruby_rational | @ruby_redo | @ruby_regex | @ruby_rescue | @ruby_rescue_modifier | @ruby_rest_assignment | @ruby_retry | @ruby_return | @ruby_right_assignment_list | @ruby_scope_resolution | @ruby_setter | @ruby_singleton_class | @ruby_singleton_method | @ruby_splat_argument | @ruby_splat_parameter | @ruby_string__ | @ruby_string_array | @ruby_subshell | @ruby_superclass | @ruby_symbol_array | @ruby_then | @ruby_token | @ruby_unary | @ruby_undef | @ruby_unless | @ruby_unless_modifier | @ruby_until | @ruby_until_modifier | @ruby_when | @ruby_while | @ruby_while_modifier | @ruby_yield + +@ruby_ast_node_parent = @file | @ruby_ast_node + +#keyset[parent, parent_index] +ruby_ast_node_parent( + int child: @ruby_ast_node ref, + int parent: @ruby_ast_node_parent ref, + int parent_index: int ref +); + +erb_comment_directive_def( + unique int id: @erb_comment_directive, + int child: @erb_token_comment ref, + int loc: @location ref +); + +erb_directive_def( + unique int id: @erb_directive, + int child: @erb_token_code ref, + int loc: @location ref +); + +erb_graphql_directive_def( + unique int id: @erb_graphql_directive, + int child: @erb_token_code ref, + int loc: @location ref +); + +erb_output_directive_def( + unique int id: @erb_output_directive, + int child: @erb_token_code ref, + int loc: @location ref +); + +@erb_template_child_type = @erb_comment_directive | @erb_directive | @erb_graphql_directive | @erb_output_directive | @erb_token_content + +#keyset[erb_template, index] +erb_template_child( + int erb_template: @erb_template ref, + int index: int ref, + unique int child: @erb_template_child_type ref +); + +erb_template_def( + unique int id: @erb_template, + int loc: @location ref +); + +erb_tokeninfo( + unique int id: @erb_token, + int kind: int ref, + string value: string ref, + int loc: @location ref +); + +case @erb_token.kind of + 0 = @erb_reserved_word +| 1 = @erb_token_code +| 2 = @erb_token_comment +| 3 = @erb_token_content +; + + +@erb_ast_node = @erb_comment_directive | @erb_directive | @erb_graphql_directive | @erb_output_directive | @erb_template | @erb_token + +@erb_ast_node_parent = @erb_ast_node | @file + +#keyset[parent, parent_index] +erb_ast_node_parent( + int child: @erb_ast_node ref, + int parent: @erb_ast_node_parent ref, + int parent_index: int ref +); + diff --git a/repo-tests/codeql-ruby/ql/lib/ruby.qll b/repo-tests/codeql-ruby/ql/lib/ruby.qll new file mode 100644 index 00000000000..18468c9f8cf --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/ruby.qll @@ -0,0 +1 @@ +import codeql.ruby.AST diff --git a/repo-tests/codeql-ruby/ql/lib/tutorial.qll b/repo-tests/codeql-ruby/ql/lib/tutorial.qll new file mode 100644 index 00000000000..8cb1797a532 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/lib/tutorial.qll @@ -0,0 +1,1207 @@ +/** + * This library is used in the QL detective tutorials. + * + * Note: Data is usually stored in a separate database and the QL libraries only contain predicates, + * but for this tutorial both the data and the predicates are stored in the library. + */ +class Person extends string { + Person() { + this = "Ronil" or + this = "Dina" or + this = "Ravi" or + this = "Bruce" or + this = "Jo" or + this = "Aida" or + this = "Esme" or + this = "Charlie" or + this = "Fred" or + this = "Meera" or + this = "Maya" or + this = "Chad" or + this = "Tiana" or + this = "Laura" or + this = "George" or + this = "Will" or + this = "Mary" or + this = "Almira" or + this = "Susannah" or + this = "Rhoda" or + this = "Cynthia" or + this = "Eunice" or + this = "Olive" or + this = "Virginia" or + this = "Angeline" or + this = "Helen" or + this = "Cornelia" or + this = "Harriet" or + this = "Mahala" or + this = "Abby" or + this = "Margaret" or + this = "Deb" or + this = "Minerva" or + this = "Severus" or + this = "Lavina" or + this = "Adeline" or + this = "Cath" or + this = "Elisa" or + this = "Lucretia" or + this = "Anne" or + this = "Eleanor" or + this = "Joanna" or + this = "Adam" or + this = "Agnes" or + this = "Rosanna" or + this = "Clara" or + this = "Melissa" or + this = "Amy" or + this = "Isabel" or + this = "Jemima" or + this = "Cordelia" or + this = "Melinda" or + this = "Delila" or + this = "Jeremiah" or + this = "Elijah" or + this = "Hester" or + this = "Walter" or + this = "Oliver" or + this = "Hugh" or + this = "Aaron" or + this = "Reuben" or + this = "Eli" or + this = "Amos" or + this = "Augustus" or + this = "Theodore" or + this = "Ira" or + this = "Timothy" or + this = "Cyrus" or + this = "Horace" or + this = "Simon" or + this = "Asa" or + this = "Frank" or + this = "Nelson" or + this = "Leonard" or + this = "Harrison" or + this = "Anthony" or + this = "Louis" or + this = "Milton" or + this = "Noah" or + this = "Cornelius" or + this = "Abdul" or + this = "Warren" or + this = "Harvey" or + this = "Dennis" or + this = "Wesley" or + this = "Sylvester" or + this = "Gilbert" or + this = "Sullivan" or + this = "Edmund" or + this = "Wilson" or + this = "Perry" or + this = "Matthew" or + this = "Simba" or + this = "Nala" or + this = "Rafiki" or + this = "Shenzi" or + this = "Ernest" or + this = "Gertrude" or + this = "Oscar" or + this = "Lilian" or + this = "Raymond" or + this = "Elgar" or + this = "Elmer" or + this = "Herbert" or + this = "Maude" or + this = "Mae" or + this = "Otto" or + this = "Edwin" or + this = "Ophelia" or + this = "Parsley" or + this = "Sage" or + this = "Rosemary" or + this = "Thyme" or + this = "Garfunkel" or + this = "King Basil" or + this = "Stephen" + } + + /** Gets the hair color of the person. If the person is bald, there is no result. */ + string getHairColor() { + this = "Ronil" and result = "black" + or + this = "Dina" and result = "black" + or + this = "Ravi" and result = "black" + or + this = "Bruce" and result = "brown" + or + this = "Jo" and result = "red" + or + this = "Aida" and result = "blond" + or + this = "Esme" and result = "blond" + or + this = "Fred" and result = "gray" + or + this = "Meera" and result = "brown" + or + this = "Maya" and result = "brown" + or + this = "Chad" and result = "brown" + or + this = "Tiana" and result = "black" + or + this = "Laura" and result = "blond" + or + this = "George" and result = "blond" + or + this = "Will" and result = "blond" + or + this = "Mary" and result = "blond" + or + this = "Almira" and result = "black" + or + this = "Susannah" and result = "blond" + or + this = "Rhoda" and result = "blond" + or + this = "Cynthia" and result = "gray" + or + this = "Eunice" and result = "white" + or + this = "Olive" and result = "brown" + or + this = "Virginia" and result = "brown" + or + this = "Angeline" and result = "red" + or + this = "Helen" and result = "white" + or + this = "Cornelia" and result = "gray" + or + this = "Harriet" and result = "white" + or + this = "Mahala" and result = "black" + or + this = "Abby" and result = "red" + or + this = "Margaret" and result = "brown" + or + this = "Deb" and result = "brown" + or + this = "Minerva" and result = "brown" + or + this = "Severus" and result = "black" + or + this = "Lavina" and result = "brown" + or + this = "Adeline" and result = "brown" + or + this = "Cath" and result = "brown" + or + this = "Elisa" and result = "brown" + or + this = "Lucretia" and result = "gray" + or + this = "Anne" and result = "black" + or + this = "Eleanor" and result = "brown" + or + this = "Joanna" and result = "brown" + or + this = "Adam" and result = "black" + or + this = "Agnes" and result = "black" + or + this = "Rosanna" and result = "gray" + or + this = "Clara" and result = "blond" + or + this = "Melissa" and result = "brown" + or + this = "Amy" and result = "brown" + or + this = "Isabel" and result = "black" + or + this = "Jemima" and result = "red" + or + this = "Cordelia" and result = "red" + or + this = "Melinda" and result = "gray" + or + this = "Delila" and result = "white" + or + this = "Jeremiah" and result = "gray" + or + this = "Hester" and result = "black" + or + this = "Walter" and result = "black" + or + this = "Aaron" and result = "gray" + or + this = "Reuben" and result = "gray" + or + this = "Eli" and result = "gray" + or + this = "Amos" and result = "white" + or + this = "Augustus" and result = "white" + or + this = "Theodore" and result = "white" + or + this = "Timothy" and result = "brown" + or + this = "Cyrus" and result = "brown" + or + this = "Horace" and result = "brown" + or + this = "Simon" and result = "brown" + or + this = "Asa" and result = "brown" + or + this = "Frank" and result = "brown" + or + this = "Nelson" and result = "black" + or + this = "Leonard" and result = "black" + or + this = "Harrison" and result = "black" + or + this = "Anthony" and result = "black" + or + this = "Louis" and result = "black" + or + this = "Milton" and result = "blond" + or + this = "Noah" and result = "blond" + or + this = "Cornelius" and result = "red" + or + this = "Abdul" and result = "brown" + or + this = "Warren" and result = "red" + or + this = "Harvey" and result = "blond" + or + this = "Dennis" and result = "blond" + or + this = "Wesley" and result = "brown" + or + this = "Sylvester" and result = "brown" + or + this = "Gilbert" and result = "brown" + or + this = "Sullivan" and result = "brown" + or + this = "Edmund" and result = "brown" + or + this = "Wilson" and result = "blond" + or + this = "Perry" and result = "black" + or + this = "Simba" and result = "brown" + or + this = "Nala" and result = "brown" + or + this = "Rafiki" and result = "red" + or + this = "Shenzi" and result = "gray" + or + this = "Ernest" and result = "blond" + or + this = "Gertrude" and result = "brown" + or + this = "Oscar" and result = "blond" + or + this = "Lilian" and result = "brown" + or + this = "Raymond" and result = "brown" + or + this = "Elgar" and result = "brown" + or + this = "Elmer" and result = "brown" + or + this = "Herbert" and result = "brown" + or + this = "Maude" and result = "brown" + or + this = "Mae" and result = "brown" + or + this = "Otto" and result = "black" + or + this = "Edwin" and result = "black" + or + this = "Ophelia" and result = "brown" + or + this = "Parsley" and result = "brown" + or + this = "Sage" and result = "brown" + or + this = "Rosemary" and result = "brown" + or + this = "Thyme" and result = "brown" + or + this = "Garfunkel" and result = "brown" + or + this = "King Basil" and result = "brown" + or + this = "Stephen" and result = "black" + or + this = "Stephen" and result = "gray" + } + + /** Gets the age of the person (in years). If the person is deceased, there is no result. */ + int getAge() { + this = "Ronil" and result = 21 + or + this = "Dina" and result = 53 + or + this = "Ravi" and result = 16 + or + this = "Bruce" and result = 35 + or + this = "Jo" and result = 47 + or + this = "Aida" and result = 26 + or + this = "Esme" and result = 25 + or + this = "Charlie" and result = 31 + or + this = "Fred" and result = 68 + or + this = "Meera" and result = 62 + or + this = "Maya" and result = 29 + or + this = "Chad" and result = 49 + or + this = "Tiana" and result = 18 + or + this = "Laura" and result = 2 + or + this = "George" and result = 3 + or + this = "Will" and result = 41 + or + this = "Mary" and result = 51 + or + this = "Almira" and result = 1 + or + this = "Susannah" and result = 97 + or + this = "Rhoda" and result = 39 + or + this = "Cynthia" and result = 89 + or + this = "Eunice" and result = 83 + or + this = "Olive" and result = 25 + or + this = "Virginia" and result = 52 + or + this = "Angeline" and result = 22 + or + this = "Helen" and result = 79 + or + this = "Cornelia" and result = 59 + or + this = "Harriet" and result = 57 + or + this = "Mahala" and result = 61 + or + this = "Abby" and result = 24 + or + this = "Margaret" and result = 59 + or + this = "Deb" and result = 31 + or + this = "Minerva" and result = 72 + or + this = "Severus" and result = 61 + or + this = "Lavina" and result = 33 + or + this = "Adeline" and result = 17 + or + this = "Cath" and result = 22 + or + this = "Elisa" and result = 9 + or + this = "Lucretia" and result = 56 + or + this = "Anne" and result = 11 + or + this = "Eleanor" and result = 80 + or + this = "Joanna" and result = 43 + or + this = "Adam" and result = 37 + or + this = "Agnes" and result = 47 + or + this = "Rosanna" and result = 61 + or + this = "Clara" and result = 31 + or + this = "Melissa" and result = 37 + or + this = "Amy" and result = 12 + or + this = "Isabel" and result = 6 + or + this = "Jemima" and result = 16 + or + this = "Cordelia" and result = 21 + or + this = "Melinda" and result = 55 + or + this = "Delila" and result = 66 + or + this = "Jeremiah" and result = 54 + or + this = "Elijah" and result = 42 + or + this = "Hester" and result = 68 + or + this = "Walter" and result = 66 + or + this = "Oliver" and result = 33 + or + this = "Hugh" and result = 51 + or + this = "Aaron" and result = 49 + or + this = "Reuben" and result = 58 + or + this = "Eli" and result = 70 + or + this = "Amos" and result = 65 + or + this = "Augustus" and result = 56 + or + this = "Theodore" and result = 69 + or + this = "Ira" and result = 1 + or + this = "Timothy" and result = 54 + or + this = "Cyrus" and result = 78 + or + this = "Horace" and result = 34 + or + this = "Simon" and result = 23 + or + this = "Asa" and result = 28 + or + this = "Frank" and result = 59 + or + this = "Nelson" and result = 38 + or + this = "Leonard" and result = 58 + or + this = "Harrison" and result = 7 + or + this = "Anthony" and result = 2 + or + this = "Louis" and result = 34 + or + this = "Milton" and result = 36 + or + this = "Noah" and result = 48 + or + this = "Cornelius" and result = 41 + or + this = "Abdul" and result = 67 + or + this = "Warren" and result = 47 + or + this = "Harvey" and result = 31 + or + this = "Dennis" and result = 39 + or + this = "Wesley" and result = 13 + or + this = "Sylvester" and result = 19 + or + this = "Gilbert" and result = 16 + or + this = "Sullivan" and result = 17 + or + this = "Edmund" and result = 29 + or + this = "Wilson" and result = 27 + or + this = "Perry" and result = 31 + or + this = "Matthew" and result = 55 + or + this = "Simba" and result = 8 + or + this = "Nala" and result = 7 + or + this = "Rafiki" and result = 76 + or + this = "Shenzi" and result = 67 + } + + /** Gets the height of the person (in cm). If the person is deceased, there is no result. */ + float getHeight() { + this = "Ronil" and result = 183.0 + or + this = "Dina" and result = 155.1 + or + this = "Ravi" and result = 175.2 + or + this = "Bruce" and result = 191.3 + or + this = "Jo" and result = 163.4 + or + this = "Aida" and result = 182.6 + or + this = "Esme" and result = 176.9 + or + this = "Charlie" and result = 189.7 + or + this = "Fred" and result = 179.4 + or + this = "Meera" and result = 160.1 + or + this = "Maya" and result = 153.0 + or + this = "Chad" and result = 168.5 + or + this = "Tiana" and result = 149.7 + or + this = "Laura" and result = 87.5 + or + this = "George" and result = 96.4 + or + this = "Will" and result = 167.1 + or + this = "Mary" and result = 159.8 + or + this = "Almira" and result = 62.1 + or + this = "Susannah" and result = 145.8 + or + this = "Rhoda" and result = 180.1 + or + this = "Cynthia" and result = 161.8 + or + this = "Eunice" and result = 153.2 + or + this = "Olive" and result = 179.9 + or + this = "Virginia" and result = 165.1 + or + this = "Angeline" and result = 172.3 + or + this = "Helen" and result = 163.1 + or + this = "Cornelia" and result = 160.8 + or + this = "Harriet" and result = 163.2 + or + this = "Mahala" and result = 157.7 + or + this = "Abby" and result = 174.5 + or + this = "Margaret" and result = 165.6 + or + this = "Deb" and result = 171.6 + or + this = "Minerva" and result = 168.7 + or + this = "Severus" and result = 188.8 + or + this = "Lavina" and result = 155.1 + or + this = "Adeline" and result = 165.5 + or + this = "Cath" and result = 147.8 + or + this = "Elisa" and result = 129.4 + or + this = "Lucretia" and result = 153.6 + or + this = "Anne" and result = 140.4 + or + this = "Eleanor" and result = 151.1 + or + this = "Joanna" and result = 167.2 + or + this = "Adam" and result = 155.5 + or + this = "Agnes" and result = 156.8 + or + this = "Rosanna" and result = 162.4 + or + this = "Clara" and result = 158.6 + or + this = "Melissa" and result = 182.3 + or + this = "Amy" and result = 147.1 + or + this = "Isabel" and result = 121.4 + or + this = "Jemima" and result = 149.8 + or + this = "Cordelia" and result = 151.7 + or + this = "Melinda" and result = 154.4 + or + this = "Delila" and result = 163.4 + or + this = "Jeremiah" and result = 167.5 + or + this = "Elijah" and result = 184.5 + or + this = "Hester" and result = 152.7 + or + this = "Walter" and result = 159.6 + or + this = "Oliver" and result = 192.4 + or + this = "Hugh" and result = 173.1 + or + this = "Aaron" and result = 176.6 + or + this = "Reuben" and result = 169.9 + or + this = "Eli" and result = 180.4 + or + this = "Amos" and result = 167.4 + or + this = "Augustus" and result = 156.5 + or + this = "Theodore" and result = 176.6 + or + this = "Ira" and result = 54.1 + or + this = "Timothy" and result = 172.2 + or + this = "Cyrus" and result = 157.9 + or + this = "Horace" and result = 169.3 + or + this = "Simon" and result = 157.1 + or + this = "Asa" and result = 149.4 + or + this = "Frank" and result = 167.2 + or + this = "Nelson" and result = 173.0 + or + this = "Leonard" and result = 172.0 + or + this = "Harrison" and result = 126.0 + or + this = "Anthony" and result = 98.4 + or + this = "Louis" and result = 186.8 + or + this = "Milton" and result = 157.8 + or + this = "Noah" and result = 190.5 + or + this = "Cornelius" and result = 183.1 + or + this = "Abdul" and result = 182.0 + or + this = "Warren" and result = 175.0 + or + this = "Harvey" and result = 169.3 + or + this = "Dennis" and result = 160.4 + or + this = "Wesley" and result = 139.8 + or + this = "Sylvester" and result = 188.2 + or + this = "Gilbert" and result = 177.6 + or + this = "Sullivan" and result = 168.3 + or + this = "Edmund" and result = 159.2 + or + this = "Wilson" and result = 167.6 + or + this = "Perry" and result = 189.1 + or + this = "Matthew" and result = 167.2 + or + this = "Simba" and result = 140.1 + or + this = "Nala" and result = 138.0 + or + this = "Rafiki" and result = 139.3 + or + this = "Shenzi" and result = 171.1 + } + + /** Gets the location of the person's home ("north", "south", "east", or "west"). If the person is deceased, there is no result. */ + string getLocation() { + this = "Ronil" and result = "north" + or + this = "Dina" and result = "north" + or + this = "Ravi" and result = "north" + or + this = "Bruce" and result = "south" + or + this = "Jo" and result = "west" + or + this = "Aida" and result = "east" + or + this = "Esme" and result = "east" + or + this = "Charlie" and result = "south" + or + this = "Fred" and result = "west" + or + this = "Meera" and result = "south" + or + this = "Maya" and result = "south" + or + this = "Chad" and result = "south" + or + this = "Tiana" and result = "west" + or + this = "Laura" and result = "south" + or + this = "George" and result = "south" + or + this = "Will" and result = "south" + or + this = "Mary" and result = "south" + or + this = "Almira" and result = "south" + or + this = "Susannah" and result = "north" + or + this = "Rhoda" and result = "north" + or + this = "Cynthia" and result = "north" + or + this = "Eunice" and result = "north" + or + this = "Olive" and result = "west" + or + this = "Virginia" and result = "west" + or + this = "Angeline" and result = "west" + or + this = "Helen" and result = "west" + or + this = "Cornelia" and result = "east" + or + this = "Harriet" and result = "east" + or + this = "Mahala" and result = "east" + or + this = "Abby" and result = "east" + or + this = "Margaret" and result = "east" + or + this = "Deb" and result = "east" + or + this = "Minerva" and result = "south" + or + this = "Severus" and result = "north" + or + this = "Lavina" and result = "east" + or + this = "Adeline" and result = "west" + or + this = "Cath" and result = "east" + or + this = "Elisa" and result = "east" + or + this = "Lucretia" and result = "north" + or + this = "Anne" and result = "north" + or + this = "Eleanor" and result = "south" + or + this = "Joanna" and result = "south" + or + this = "Adam" and result = "east" + or + this = "Agnes" and result = "east" + or + this = "Rosanna" and result = "east" + or + this = "Clara" and result = "east" + or + this = "Melissa" and result = "west" + or + this = "Amy" and result = "west" + or + this = "Isabel" and result = "west" + or + this = "Jemima" and result = "west" + or + this = "Cordelia" and result = "west" + or + this = "Melinda" and result = "west" + or + this = "Delila" and result = "south" + or + this = "Jeremiah" and result = "north" + or + this = "Elijah" and result = "north" + or + this = "Hester" and result = "east" + or + this = "Walter" and result = "east" + or + this = "Oliver" and result = "east" + or + this = "Hugh" and result = "south" + or + this = "Aaron" and result = "south" + or + this = "Reuben" and result = "west" + or + this = "Eli" and result = "west" + or + this = "Amos" and result = "east" + or + this = "Augustus" and result = "south" + or + this = "Theodore" and result = "west" + or + this = "Ira" and result = "south" + or + this = "Timothy" and result = "north" + or + this = "Cyrus" and result = "north" + or + this = "Horace" and result = "east" + or + this = "Simon" and result = "east" + or + this = "Asa" and result = "east" + or + this = "Frank" and result = "west" + or + this = "Nelson" and result = "west" + or + this = "Leonard" and result = "west" + or + this = "Harrison" and result = "north" + or + this = "Anthony" and result = "north" + or + this = "Louis" and result = "north" + or + this = "Milton" and result = "south" + or + this = "Noah" and result = "south" + or + this = "Cornelius" and result = "east" + or + this = "Abdul" and result = "east" + or + this = "Warren" and result = "west" + or + this = "Harvey" and result = "west" + or + this = "Dennis" and result = "west" + or + this = "Wesley" and result = "west" + or + this = "Sylvester" and result = "south" + or + this = "Gilbert" and result = "east" + or + this = "Sullivan" and result = "east" + or + this = "Edmund" and result = "north" + or + this = "Wilson" and result = "north" + or + this = "Perry" and result = "west" + or + this = "Matthew" and result = "east" + or + this = "Simba" and result = "south" + or + this = "Nala" and result = "south" + or + this = "Rafiki" and result = "north" + or + this = "Shenzi" and result = "west" + } + + /** Holds if the person is deceased. */ + predicate isDeceased() { + this = "Ernest" or + this = "Gertrude" or + this = "Oscar" or + this = "Lilian" or + this = "Edwin" or + this = "Raymond" or + this = "Elgar" or + this = "Elmer" or + this = "Herbert" or + this = "Maude" or + this = "Mae" or + this = "Otto" or + this = "Ophelia" or + this = "Parsley" or + this = "Sage" or + this = "Rosemary" or + this = "Thyme" or + this = "Garfunkel" or + this = "King Basil" + } + + /** Gets a parent of the person (alive or deceased). */ + Person getAParent() { + this = "Stephen" and result = "Edmund" + or + this = "Edmund" and result = "Augustus" + or + this = "Augustus" and result = "Stephen" + or + this = "Abby" and result = "Cornelia" + or + this = "Abby" and result = "Amos" + or + this = "Abdul" and result = "Susannah" + or + this = "Adam" and result = "Amos" + or + this = "Adeline" and result = "Melinda" + or + this = "Adeline" and result = "Frank" + or + this = "Agnes" and result = "Abdul" + or + this = "Aida" and result = "Agnes" + or + this = "Almira" and result = "Sylvester" + or + this = "Amos" and result = "Eunice" + or + this = "Amy" and result = "Noah" + or + this = "Amy" and result = "Chad" + or + this = "Angeline" and result = "Reuben" + or + this = "Angeline" and result = "Lucretia" + or + this = "Anne" and result = "Rhoda" + or + this = "Anne" and result = "Louis" + or + this = "Anthony" and result = "Lavina" + or + this = "Anthony" and result = "Asa" + or + this = "Asa" and result = "Cornelia" + or + this = "Cath" and result = "Harriet" + or + this = "Charlie" and result = "Matthew" + or + this = "Clara" and result = "Ernest" + or + this = "Cornelia" and result = "Cynthia" + or + this = "Cornelius" and result = "Eli" + or + this = "Deb" and result = "Margaret" + or + this = "Dennis" and result = "Fred" + or + this = "Eli" and result = "Susannah" + or + this = "Elijah" and result = "Delila" + or + this = "Elisa" and result = "Deb" + or + this = "Elisa" and result = "Horace" + or + this = "Esme" and result = "Margaret" + or + this = "Frank" and result = "Eleanor" + or + this = "Frank" and result = "Cyrus" + or + this = "George" and result = "Maya" + or + this = "George" and result = "Wilson" + or + this = "Gilbert" and result = "Cornelius" + or + this = "Harriet" and result = "Cynthia" + or + this = "Harrison" and result = "Louis" + or + this = "Harvey" and result = "Fred" + or + this = "Helen" and result = "Susannah" + or + this = "Hester" and result = "Edwin" + or + this = "Hugh" and result = "Cyrus" + or + this = "Hugh" and result = "Helen" + or + this = "Ira" and result = "Maya" + or + this = "Ira" and result = "Wilson" + or + this = "Isabel" and result = "Perry" + or + this = "Isabel" and result = "Harvey" + or + this = "Jemima" and result = "Melinda" + or + this = "Jemima" and result = "Frank" + or + this = "Ernest" and result = "Lilian" + or + this = "Ernest" and result = "Oscar" + or + this = "Gertrude" and result = "Ophelia" + or + this = "Gertrude" and result = "Raymond" + or + this = "Lilian" and result = "Elgar" + or + this = "Lilian" and result = "Mae" + or + this = "Raymond" and result = "Elgar" + or + this = "Raymond" and result = "Mae" + or + this = "Elmer" and result = "Ophelia" + or + this = "Elmer" and result = "Raymond" + or + this = "Herbert" and result = "Ophelia" + or + this = "Herbert" and result = "Raymond" + or + this = "Maude" and result = "Ophelia" + or + this = "Maude" and result = "Raymond" + or + this = "Otto" and result = "Elgar" + or + this = "Otto" and result = "Mae" + or + this = "Edwin" and result = "Otto" + or + this = "Parsley" and result = "Simon" + or + this = "Parsley" and result = "Garfunkel" + or + this = "Sage" and result = "Simon" + or + this = "Sage" and result = "Garfunkel" + or + this = "Rosemary" and result = "Simon" + or + this = "Rosemary" and result = "Garfunkel" + or + this = "Thyme" and result = "Simon" + or + this = "Thyme" and result = "Garfunkel" + or + this = "King Basil" and result = "Ophelia" + or + this = "King Basil" and result = "Raymond" + or + this = "Jo" and result = "Theodore" + or + this = "Joanna" and result = "Shenzi" + or + this = "Laura" and result = "Maya" + or + this = "Laura" and result = "Wilson" + or + this = "Lavina" and result = "Mahala" + or + this = "Lavina" and result = "Walter" + or + this = "Leonard" and result = "Cyrus" + or + this = "Leonard" and result = "Helen" + or + this = "Lucretia" and result = "Eleanor" + or + this = "Lucretia" and result = "Cyrus" + or + this = "Mahala" and result = "Eunice" + or + this = "Margaret" and result = "Cynthia" + or + this = "Matthew" and result = "Cyrus" + or + this = "Matthew" and result = "Helen" + or + this = "Maya" and result = "Meera" + or + this = "Melinda" and result = "Rafiki" + or + this = "Melissa" and result = "Mahala" + or + this = "Melissa" and result = "Walter" + or + this = "Nala" and result = "Bruce" + or + this = "Nelson" and result = "Mahala" + or + this = "Nelson" and result = "Walter" + or + this = "Noah" and result = "Eli" + or + this = "Olive" and result = "Reuben" + or + this = "Olive" and result = "Lucretia" + or + this = "Oliver" and result = "Matthew" + or + this = "Perry" and result = "Leonard" + or + this = "Ravi" and result = "Dina" + or + this = "Simba" and result = "Will" + or + this = "Simon" and result = "Margaret" + or + this = "Sullivan" and result = "Cornelius" + or + this = "Sylvester" and result = "Timothy" + or + this = "Theodore" and result = "Susannah" + or + this = "Tiana" and result = "Jo" + or + this = "Virginia" and result = "Helen" + or + this = "Warren" and result = "Shenzi" + or + this = "Wesley" and result = "Warren" + or + this = "Wesley" and result = "Jo" + or + this = "Will" and result = "Eli" + } + + /** Holds if the person is allowed in the region. Initially, all villagers are allowed in every region. */ + predicate isAllowedIn(string region) { + region = "north" or + region = "south" or + region = "east" or + region = "west" + } +} + +/** Returns a parent of the person. */ +Person parentOf(Person p) { result = p.getAParent() } diff --git a/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql b/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql new file mode 100644 index 00000000000..b10c4ecbb45 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql @@ -0,0 +1,82 @@ +/** + * @name Alert suppression + * @description Generates information about alert suppressions. + * @kind alert-suppression + * @id rb/alert-suppression + */ + +import ruby +import codeql.ruby.ast.internal.TreeSitter + +/** + * An alert suppression comment. + */ +class SuppressionComment extends Ruby::Comment { + string annotation; + + SuppressionComment() { + // suppression comments must be single-line + this.getLocation().getStartLine() = this.getLocation().getEndLine() and + exists(string text | text = commentText(this) | + // match `lgtm[...]` anywhere in the comment + annotation = text.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _) + or + // match `lgtm` at the start of the comment and after semicolon + annotation = text.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim() + ) + } + + /** + * Gets the text of this suppression comment. + */ + string getText() { result = commentText(this) } + + /** Gets the suppression annotation in this comment. */ + string getAnnotation() { result = annotation } + + /** + * Holds if this comment applies to the range from column `startcolumn` of line `startline` + * to column `endcolumn` of line `endline` in file `filepath`. + */ + predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) { + this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and + startcolumn = 1 + } + + /** Gets the scope of this suppression. */ + SuppressionScope getScope() { this = result.getSuppressionComment() } +} + +private string commentText(Ruby::Comment comment) { result = comment.getValue().suffix(1) } + +/** + * The scope of an alert suppression comment. + */ +class SuppressionScope extends @ruby_token_comment { + SuppressionScope() { this instanceof SuppressionComment } + + /** Gets a suppression comment with this scope. */ + SuppressionComment getSuppressionComment() { result = this } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets a textual representation of this element. */ + string toString() { result = "suppression range" } +} + +from SuppressionComment c +select c, // suppression comment + c.getText(), // text of suppression comment (excluding delimiters) + c.getAnnotation(), // text of suppression annotation + c.getScope() // scope of suppression diff --git a/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql b/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql new file mode 100644 index 00000000000..f5fcf6df4fb --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql @@ -0,0 +1,64 @@ +/** + * @name Use detect + * @description Use 'detect' instead of 'select' followed by 'first' or 'last'. + * @kind problem + * @problem.severity warning + * @id rb/use-detect + * @tags performance rubocop + * @precision high + */ + +// This is an implementation of the Rubocop rule +// https://github.com/rubocop/rubocop-performance/blob/master/lib/rubocop/cop/performance/detect.rb +import ruby +import codeql.ruby.dataflow.SSA + +/** A call that extracts the first or last element of a list. */ +class EndCall extends MethodCall { + string detect; + + EndCall() { + detect = "detect" and + ( + this.getMethodName() = "first" and + this.getNumberOfArguments() = 0 + or + this.getNumberOfArguments() = 1 and + this.getArgument(0).(IntegerLiteral).getValueText() = "0" + ) + or + detect = "reverse_detect" and + ( + this.getMethodName() = "last" and + this.getNumberOfArguments() = 0 + or + this.getNumberOfArguments() = 1 and + this.getArgument(0).(UnaryMinusExpr).getOperand().(IntegerLiteral).getValueText() = "1" + ) + } + + string detectCall() { result = detect } +} + +Expr getUniqueRead(Expr e) { + exists(AssignExpr ae | + e = ae.getRightOperand() and + forex(Ssa::WriteDefinition def | def.getWriteAccess() = ae.getLeftOperand() | + strictcount(def.getARead()) = 1 and + not def = any(Ssa::PhiNode phi).getAnInput() and + def.getARead() = result.getAControlFlowNode() + ) + ) +} + +class SelectBlock extends MethodCall { + SelectBlock() { + this.getMethodName() in ["select", "filter", "find_all"] and + exists(this.getBlock()) + } +} + +from EndCall call, SelectBlock selectBlock +where getUniqueRead*(selectBlock) = call.getReceiver() +select call, "Replace this call and $@ with '" + call.detectCall() + "'.", selectBlock, + "'select' call" diff --git a/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql b/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql new file mode 100644 index 00000000000..d194523e09d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql @@ -0,0 +1,20 @@ +/** + * @name Classify files + * @description This query produces a list of all files in a database + * that are classified as generated code or test code. + * + * Used by LGTM. + * @kind file-classifier + * @id rb/file-classifier + */ + +import ruby +import codeql.ruby.filters.GeneratedCode + +predicate classify(File f, string category) { + f instanceof GeneratedCodeFile and category = "generated" +} + +from File f, string category +where classify(f, category) +select f, category diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql new file mode 100644 index 00000000000..81c5e449bb1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql @@ -0,0 +1,20 @@ +/** + * @name Jump-to-definition links + * @description Generates use-definition pairs that provide the data + * for jump-to-definition in the code viewer. + * @kind definitions + * @id ruby/ide-jump-to-definition + * @tags ide-contextual-queries/local-definitions + */ + +import codeql.IDEContextual +import codeql.ruby.AST + +external string selectedSourceFile(); + +from AstNode e, Variable def, string kind +where + e = def.getAnAccess() and + kind = "local variable" and + e.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile()) +select e, def, kind diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql new file mode 100644 index 00000000000..713b363e60f --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql @@ -0,0 +1,21 @@ +/** + * @name Find-references links + * @description Generates use-definition pairs that provide the data + * for find-references in the code viewer. + * @kind definitions + * @id ruby/ide-find-references + * @tags ide-contextual-queries/local-references + */ + +import codeql.IDEContextual +import codeql.ruby.AST +import codeql.ruby.ast.Variable + +external string selectedSourceFile(); + +from AstNode e, Variable def, string kind +where + e = def.getAnAccess() and + kind = "local variable" and + def.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile()) +select e, def, kind diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql new file mode 100644 index 00000000000..cd5b9a4a3b2 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql @@ -0,0 +1,27 @@ +/** + * @name Print AST + * @description Produces a representation of a file's Abstract Syntax Tree. + * This query is used by the VS Code extension. + * @id ruby/print-ast + * @kind graph + * @tags ide-contextual-queries/print-ast + */ + +private import codeql.IDEContextual +private import codeql.ruby.AST +private import codeql.ruby.printAst + +/** + * The source file to generate an AST from. + */ +external string selectedSourceFile(); + +/** + * Overrides the configuration to print only nodes in the selected source file. + */ +class Cfg extends PrintAstConfiguration { + override predicate shouldPrintNode(AstNode n) { + super.shouldPrintNode(n) and + n.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile()) + } +} diff --git a/repo-tests/codeql-ruby/ql/src/qlpack.yml b/repo-tests/codeql-ruby/ql/src/qlpack.yml new file mode 100644 index 00000000000..1c346968c43 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/qlpack.yml @@ -0,0 +1,7 @@ +name: codeql/ruby-queries +version: 0.0.2 +suites: codeql-suites +defaultSuiteFile: codeql-suites/ruby-code-scanning.qls +dependencies: + codeql/ruby-all: "*" + codeql/suite-helpers: "*" diff --git a/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql b/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql new file mode 100644 index 00000000000..aff97bbc345 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql @@ -0,0 +1,81 @@ +/** + * @name Definitions + * @description Jump to definition helper query. + * @kind definitions + * @id rb/jump-to-definition + */ + +/* + * TODO: + * - should `Foo.new` point to `Foo#initialize`? + */ + +import ruby +import codeql.ruby.ast.internal.Module +import codeql.ruby.dataflow.SSA + +from DefLoc loc, Expr src, Expr target, string kind +where + ConstantDefLoc(src, target) = loc and kind = "constant" + or + MethodLoc(src, target) = loc and kind = "method" + or + LocalVariableLoc(src, target) = loc and kind = "variable" + or + InstanceVariableLoc(src, target) = loc and kind = "instance variable" + or + ClassVariableLoc(src, target) = loc and kind = "class variable" +select src, target, kind + +/** + * Definition location info for different identifiers. + * Each branch holds two values that are subclasses of `Expr`. + * The first is the "source" - some usage of an identifier. + * The second is the "target" - the definition of that identifier. + */ +newtype DefLoc = + /** A constant, module or class. */ + ConstantDefLoc(ConstantReadAccess read, ConstantWriteAccess write) { write = definitionOf(read) } or + /** A method call. */ + MethodLoc(MethodCall call, Method meth) { meth = call.getATarget() } or + /** A local variable. */ + LocalVariableLoc(VariableReadAccess read, VariableWriteAccess write) { + exists(Ssa::WriteDefinition w | + write = w.getWriteAccess() and + read = w.getARead().getExpr() and + not read.isSynthesized() + ) + } or + /** An instance variable */ + InstanceVariableLoc(InstanceVariableReadAccess read, InstanceVariableWriteAccess write) { + /* + * We consider instance variables to be "defined" in the initialize method of their enclosing class. + * If that method doesn't exist, we won't provide any jump-to-def information for the instance variable. + */ + + exists(Method m | + m.getAChild+() = write and + m.getName() = "initialize" and + write.getVariable() = read.getVariable() + ) + } or + /** A class variable */ + ClassVariableLoc(ClassVariableReadAccess read, ClassVariableWriteAccess write) { + read.getVariable() = write.getVariable() and + not exists(MethodBase m | m.getAChild+() = write) + } + +/** + * Gets the constant write that defines the given constant. + * Modules often don't have a unique definition, as they are opened multiple times in different + * files. In these cases we arbitrarily pick the definition with the lexicographically least + * location. + */ +ConstantWriteAccess definitionOf(ConstantReadAccess r) { + result = + min(ConstantWriteAccess w | + w.getQualifiedName() = resolveConstant(r) + | + w order by w.getLocation().toString() + ) +} diff --git a/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql new file mode 100644 index 00000000000..5c55d984337 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql @@ -0,0 +1,18 @@ +/** + * @name Extraction errors + * @description List all extraction errors for files in the source code directory. + * @kind diagnostic + * @id rb/diagnostics/extraction-errors + */ + +import ruby +import codeql.ruby.Diagnostics + +/** Gets the SARIF severity to associate an error. */ +int getSeverity() { result = 2 } + +from ExtractionError error, File f +where + f = error.getLocation().getFile() and + exists(f.getRelativePath()) +select error, "Extraction failed in " + f + " with error " + error.getMessage(), getSeverity() diff --git a/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql new file mode 100644 index 00000000000..74f95763d8a --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql @@ -0,0 +1,16 @@ +/** + * @name Successfully extracted files + * @description Lists all files in the source code directory that were extracted + * without encountering an error in the file. + * @kind diagnostic + * @id rb/diagnostics/successfully-extracted-files + */ + +import ruby +import codeql.ruby.Diagnostics + +from File f +where + not exists(ExtractionError e | e.getLocation().getFile() = f) and + exists(f.getRelativePath()) +select f, "" diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql new file mode 100644 index 00000000000..97c319fbf73 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql @@ -0,0 +1,13 @@ +/** + * @name Number of lines + * @kind metric + * @description The number of lines in each file. + * @metricType file + * @id rb/lines-per-file + */ + +import ruby + +from RubyFile f, int n +where n = f.getNumberOfLines() +select f, n order by n desc diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql new file mode 100644 index 00000000000..0c1d15960cc --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql @@ -0,0 +1,14 @@ +/** + * @name Lines of code in files + * @kind metric + * @description Measures the number of lines of code in each file, ignoring lines that + * contain only comments or whitespace. + * @metricType file + * @id rb/lines-of-code-in-files + */ + +import ruby + +from RubyFile f, int n +where n = f.getNumberOfLinesOfCode() +select f, n order by n desc diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql new file mode 100644 index 00000000000..8af882f13d1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql @@ -0,0 +1,13 @@ +/** + * @name Lines of comments in files + * @kind metric + * @description Measures the number of lines of comments in each file. + * @metricType file + * @id rb/lines-of-comments-in-files + */ + +import ruby + +from RubyFile f, int n +where n = f.getNumberOfLinesOfComments() +select f, n order by n desc diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql new file mode 100644 index 00000000000..4c2dda966b9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql @@ -0,0 +1,25 @@ +/** + * @name Uncontrolled command line + * @description Using externally controlled strings in a command line may allow a malicious + * user to change the meaning of the command. + * @kind path-problem + * @problem.severity error + * @security-severity 9.8 + * @precision high + * @id rb/command-line-injection + * @tags correctness + * security + * external/cwe/cwe-078 + * external/cwe/cwe-088 + */ + +import ruby +import codeql.ruby.security.CommandInjectionQuery +import DataFlow::PathGraph + +from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Source sourceNode +where + config.hasFlowPath(source, sink) and + sourceNode = source.getNode() +select sink.getNode(), source, sink, "This command depends on $@.", sourceNode, + sourceNode.getSourceType() diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql new file mode 100644 index 00000000000..5bb02183915 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql @@ -0,0 +1,76 @@ +/** + * @name Use of `Kernel.open` or `IO.read` + * @description Using `Kernel.open` or `IO.read` may allow a malicious + * user to execute arbitrary system commands. + * @kind path-problem + * @problem.severity error + * @security-severity 9.8 + * @precision high + * @id rb/kernel-open + * @tags correctness + * security + * external/cwe/cwe-078 + * external/cwe/cwe-088 + * external/cwe/cwe-073 + */ + +import ruby +import codeql.ruby.ApiGraphs +import codeql.ruby.frameworks.StandardLibrary +import codeql.ruby.TaintTracking +import codeql.ruby.dataflow.BarrierGuards +import codeql.ruby.dataflow.RemoteFlowSources +import DataFlow::PathGraph + +/** + * Method calls that have a suggested replacement. + */ +abstract class Replacement extends DataFlow::CallNode { + abstract string getFrom(); + + abstract string getTo(); +} + +class KernelOpenCall extends KernelMethodCall, Replacement { + KernelOpenCall() { this.getMethodName() = "open" } + + override string getFrom() { result = "Kernel.open" } + + override string getTo() { result = "File.open" } +} + +class IOReadCall extends DataFlow::CallNode, Replacement { + IOReadCall() { this = API::getTopLevelMember("IO").getAMethodCall("read") } + + override string getFrom() { result = "IO.read" } + + override string getTo() { result = "File.read" } +} + +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "KernelOpen" } + + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } + + override predicate isSink(DataFlow::Node sink) { + exists(KernelOpenCall c | c.getArgument(0) = sink) + or + exists(IOReadCall c | c.getArgument(0) = sink) + } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof StringConstCompare or + guard instanceof StringConstArrayInclusionCall + } +} + +from + Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, + DataFlow::Node sourceNode, DataFlow::CallNode call +where + config.hasFlowPath(source, sink) and + sourceNode = source.getNode() and + call.asExpr().getExpr().(MethodCall).getArgument(0) = sink.getNode().asExpr().getExpr() +select sink.getNode(), source, sink, + "This call to " + call.(Replacement).getFrom() + + " depends on a user-provided value. Replace it with " + call.(Replacement).getTo() + "." diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql new file mode 100644 index 00000000000..d3f95f69fea --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql @@ -0,0 +1,24 @@ +/** + * @name Reflected server-side cross-site scripting + * @description Writing user input directly to a web page + * allows for a cross-site scripting vulnerability. + * @kind path-problem + * @problem.severity error + * @security-severity 6.1 + * @sub-severity high + * @precision high + * @id rb/reflected-xss + * @tags security + * external/cwe/cwe-079 + * external/cwe/cwe-116 + */ + +import ruby +import codeql.ruby.security.ReflectedXSSQuery +import codeql.ruby.DataFlow +import DataFlow::PathGraph + +from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.", + source.getNode(), "a user-provided value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql new file mode 100644 index 00000000000..e473d5c31e9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql @@ -0,0 +1,23 @@ +/** + * @name Stored cross-site scripting + * @description Using uncontrolled stored values in HTML allows for + * a stored cross-site scripting vulnerability. + * @kind path-problem + * @problem.severity error + * @security-severity 6.1 + * @precision high + * @id rb/stored-xss + * @tags security + * external/cwe/cwe-079 + * external/cwe/cwe-116 + */ + +import ruby +import codeql.ruby.security.StoredXSSQuery +import codeql.ruby.DataFlow +import DataFlow::PathGraph + +from StoredXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@", + source.getNode(), "stored value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql new file mode 100644 index 00000000000..de795e34e71 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql @@ -0,0 +1,39 @@ +/** + * @name SQL query built from user-controlled sources + * @description Building a SQL query from user-controlled sources is vulnerable to insertion of + * malicious SQL code by the user. + * @kind path-problem + * @problem.severity error + * @security-severity 8.8 + * @precision high + * @id rb/sql-injection + * @tags security + * external/cwe/cwe-089 + * external/owasp/owasp-a1 + */ + +import ruby +import codeql.ruby.Concepts +import codeql.ruby.DataFlow +import codeql.ruby.dataflow.BarrierGuards +import codeql.ruby.dataflow.RemoteFlowSources +import codeql.ruby.TaintTracking +import DataFlow::PathGraph + +class SQLInjectionConfiguration extends TaintTracking::Configuration { + SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" } + + override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } + + override predicate isSink(DataFlow::Node sink) { sink instanceof SqlExecution } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof StringConstCompare or + guard instanceof StringConstArrayInclusionCall + } +} + +from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(), + "a user-provided value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql new file mode 100644 index 00000000000..60e8e32c2f6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql @@ -0,0 +1,27 @@ +/** + * @name Code injection + * @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary + * code execution. + * @kind path-problem + * @problem.severity error + * @security-severity 9.3 + * @sub-severity high + * @precision high + * @id rb/code-injection + * @tags security + * external/owasp/owasp-a1 + * external/cwe/cwe-094 + * external/cwe/cwe-095 + * external/cwe/cwe-116 + */ + +import ruby +import codeql.ruby.security.CodeInjectionQuery +import DataFlow::PathGraph + +from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Source sourceNode +where + config.hasFlowPath(source, sink) and + sourceNode = source.getNode() +select sink.getNode(), source, sink, "This code execution depends on $@.", sourceNode, + "a user-provided value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql new file mode 100644 index 00000000000..9ee914c3bf0 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql @@ -0,0 +1,31 @@ +/** + * @name Polynomial regular expression used on uncontrolled data + * @description A regular expression that can require polynomial time + * to match may be vulnerable to denial-of-service attacks. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision high + * @id rb/polynomial-redos + * @tags security + * external/cwe/cwe-1333 + * external/cwe/cwe-730 + * external/cwe/cwe-400 + */ + +import DataFlow::PathGraph +import codeql.ruby.DataFlow +import codeql.ruby.regexp.PolynomialReDoSQuery +import codeql.ruby.regexp.SuperlinearBackTracking + +from + PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, + PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp +where + config.hasFlowPath(source, sink) and + sinkNode = sink.getNode() and + regexp = sinkNode.getRegExp() +select sinkNode.getHighlight(), source, sink, + "This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() + + "with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression", + source.getNode(), "a user-provided value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql new file mode 100644 index 00000000000..234772240e3 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql @@ -0,0 +1,25 @@ +/** + * @name Inefficient regular expression + * @description A regular expression that requires exponential time to match certain inputs + * can be a performance bottleneck, and may be vulnerable to denial-of-service + * attacks. + * @kind problem + * @problem.severity error + * @security-severity 7.5 + * @precision high + * @id rb/redos + * @tags security + * external/cwe/cwe-1333 + * external/cwe/cwe-730 + * external/cwe/cwe-400 + */ + +import codeql.ruby.regexp.ExponentialBackTracking +import codeql.ruby.regexp.ReDoSUtil +import codeql.ruby.regexp.RegExpTreeView + +from RegExpTerm t, string pump, State s, string prefixMsg +where hasReDoSResult(t, pump, s, prefixMsg) +select t, + "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg + + "containing many repetitions of '" + pump + "'." diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql new file mode 100644 index 00000000000..e9b236897bc --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql @@ -0,0 +1,20 @@ +/** + * @name Request without certificate validation + * @description Making a request without certificate validation can allow + * man-in-the-middle attacks. + * @kind problem + * @problem.severity warning + * @security-severity 7.5 + * @precision medium + * @id rb/request-without-cert-validation + * @tags security + * external/cwe/cwe-295 + */ + +import ruby +import codeql.ruby.Concepts +import codeql.ruby.DataFlow + +from HTTP::Client::Request request, DataFlow::Node disablingNode +where request.disablesCertificateValidation(disablingNode) +select request, "This request may run with $@.", disablingNode, "certificate validation disabled" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql new file mode 100644 index 00000000000..0df3b7c8d67 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql @@ -0,0 +1,21 @@ +/** + * @name Deserialization of user-controlled data + * @description Deserializing user-controlled data may allow attackers to + * execute arbitrary code. + * @kind path-problem + * @problem.severity warning + * @security-severity 9.8 + * @precision high + * @id rb/unsafe-deserialization + * @tags security + * external/cwe/cwe-502 + */ + +import ruby +import DataFlow::PathGraph +import codeql.ruby.DataFlow +import codeql.ruby.security.UnsafeDeserializationQuery + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Unsafe deserialization of $@.", source.getNode(), "user input" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql new file mode 100644 index 00000000000..aeaa4c29dc5 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql @@ -0,0 +1,22 @@ +/** + * @name URL redirection from remote source + * @description URL redirection based on unvalidated user input + * may cause redirection to malicious web sites. + * @kind path-problem + * @problem.severity error + * @security-severity 6.1 + * @sub-severity low + * @id rb/url-redirection + * @tags security + * external/cwe/cwe-601 + * @precision high + */ + +import ruby +import codeql.ruby.security.UrlRedirectQuery +import codeql.ruby.DataFlow::DataFlow::PathGraph + +from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(), + "a user-provided value" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql new file mode 100644 index 00000000000..c7eae21333e --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql @@ -0,0 +1,43 @@ +/** + * @name XML external entity expansion + * @description Parsing user input as an XML document with external + * entity expansion is vulnerable to XXE attacks. + * @kind path-problem + * @problem.severity error + * @security-severity 9.1 + * @precision high + * @id rb/xxe + * @tags security + * external/cwe/cwe-611 + * external/cwe/cwe-776 + * external/cwe/cwe-827 + */ + +import ruby +import codeql.ruby.dataflow.RemoteFlowSources +import codeql.ruby.TaintTracking +import codeql.ruby.Concepts +import codeql.ruby.DataFlow +import DataFlow::PathGraph + +class UnsafeXxeSink extends DataFlow::ExprNode { + UnsafeXxeSink() { + exists(XmlParserCall parse | + parse.getInput() = this and + parse.externalEntitiesEnabled() + ) + } +} + +class XxeConfig extends TaintTracking::Configuration { + XxeConfig() { this = "XXE.ql::XxeConfig" } + + override predicate isSource(DataFlow::Node src) { src instanceof RemoteFlowSource } + + override predicate isSink(DataFlow::Node sink) { sink instanceof UnsafeXxeSink } +} + +from DataFlow::PathNode source, DataFlow::PathNode sink, XxeConfig conf +where conf.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Unsafe parsing of XML file from $@.", source.getNode(), + "user input" diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql new file mode 100644 index 00000000000..793eafe04bd --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql @@ -0,0 +1,64 @@ +/** + * @name Overly permissive file permissions + * @description Allowing files to be readable or writable by users other than the owner may allow sensitive information to be accessed. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.8 + * @id rb/overly-permissive-file + * @tags external/cwe/cwe-732 + * security + * @precision low + */ + +import ruby +import codeql.ruby.Concepts +import codeql.ruby.DataFlow +import DataFlow::PathGraph +import codeql.ruby.ApiGraphs + +bindingset[p] +int world_permission(int p) { result = p.bitAnd(7) } + +// 70 oct = 56 dec +bindingset[p] +int group_permission(int p) { result = p.bitAnd(56) } + +bindingset[p] +string access(int p) { + p.bitAnd(2) != 0 and result = "writable" + or + p.bitAnd(4) != 0 and result = "readable" +} + +/** An expression specifing a file permission that allows group/others read or write access */ +class PermissivePermissionsExpr extends Expr { + // TODO: non-literal expressions? + PermissivePermissionsExpr() { + exists(int perm, string acc | + perm = this.(IntegerLiteral).getValue() and + (acc = access(world_permission(perm)) or acc = access(group_permission(perm))) + ) + or + // adding/setting read or write permissions for all/group/other + this.(StringLiteral).getValueText().regexpMatch(".*[ago][^-=+]*[+=][xXst]*[rw].*") + } +} + +class PermissivePermissionsConfig extends DataFlow::Configuration { + PermissivePermissionsConfig() { this = "PermissivePermissionsConfig" } + + override predicate isSource(DataFlow::Node source) { + exists(PermissivePermissionsExpr ppe | source.asExpr().getExpr() = ppe) + } + + override predicate isSink(DataFlow::Node sink) { + exists(FileSystemPermissionModification mod | mod.getAPermissionNode() = sink) + } +} + +from + DataFlow::PathNode source, DataFlow::PathNode sink, PermissivePermissionsConfig conf, + FileSystemPermissionModification mod +where conf.hasFlowPath(source, sink) and mod.getAPermissionNode() = sink.getNode() +select source.getNode(), source, sink, "Overly permissive mask in $@ sets file to $@.", mod, + mod.toString(), source.getNode(), source.getNode().toString() diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql new file mode 100644 index 00000000000..c887793031d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql @@ -0,0 +1,155 @@ +/** + * @name Hard-coded credentials + * @description Credentials are hard coded in the source code of the application. + * @kind path-problem + * @problem.severity error + * @security-severity 9.8 + * @precision high + * @id rb/hardcoded-credentials + * @tags security + * external/cwe/cwe-259 + * external/cwe/cwe-321 + * external/cwe/cwe-798 + */ + +import ruby +import codeql.ruby.DataFlow +import DataFlow::PathGraph +import codeql.ruby.TaintTracking +import codeql.ruby.controlflow.CfgNodes + +bindingset[char, fraction] +predicate fewer_characters_than(StringLiteral str, string char, float fraction) { + exists(string text, int chars | + text = str.getValueText() and + chars = count(int i | text.charAt(i) = char) + | + /* Allow one character */ + chars = 1 or + chars < text.length() * fraction + ) +} + +predicate possible_reflective_name(string name) { + // TODO: implement this? + none() +} + +int char_count(StringLiteral str) { result = count(string c | c = str.getValueText().charAt(_)) } + +predicate capitalized_word(StringLiteral str) { str.getValueText().regexpMatch("[A-Z][a-z]+") } + +predicate format_string(StringLiteral str) { str.getValueText().matches("%{%}%") } + +predicate maybeCredential(Expr e) { + /* A string that is not too short and unlikely to be text or an identifier. */ + exists(StringLiteral str | str = e | + /* At least 10 characters */ + str.getValueText().length() > 9 and + /* Not too much whitespace */ + fewer_characters_than(str, " ", 0.05) and + /* or underscores */ + fewer_characters_than(str, "_", 0.2) and + /* Not too repetitive */ + exists(int chars | chars = char_count(str) | + chars > 15 or + chars * 3 > str.getValueText().length() * 2 + ) and + not possible_reflective_name(str.getValueText()) and + not capitalized_word(str) and + not format_string(str) + ) + or + /* Or, an integer with over 32 bits */ + exists(IntegerLiteral lit | lit = e | + not exists(lit.getValue()) and + /* Not a set of flags or round number */ + not lit.getValueText().matches("%00%") + ) +} + +class HardcodedValueSource extends DataFlow::Node { + HardcodedValueSource() { maybeCredential(this.asExpr().getExpr()) } +} + +/** + * Gets a regular expression for matching names of locations (variables, parameters, keys) that + * indicate the value being held is a credential. + */ +private string getACredentialRegExp() { + result = "(?i).*pass(wd|word|code|phrase)(?!.*question).*" or + result = "(?i).*(puid|username|userid).*" or + result = "(?i).*(cert)(?!.*(format|name)).*" +} + +bindingset[name] +private predicate maybeCredentialName(string name) { + name.regexpMatch(getACredentialRegExp()) and + not name.suffix(name.length() - 4) = "file" +} + +// Positional parameter +private DataFlow::Node credentialParameter() { + exists(Method m, NamedParameter p, int idx | + result.asParameter() = p and + p = m.getParameter(idx) and + maybeCredentialName(p.getName()) + ) +} + +// Keyword argument +private Expr credentialKeywordArgument() { + exists(MethodCall mc, string argKey | + result = mc.getKeywordArgument(argKey) and + maybeCredentialName(argKey) + ) +} + +// An equality check against a credential value +private Expr credentialComparison() { + exists(EqualityOperation op, VariableReadAccess vra | + maybeCredentialName(vra.getVariable().getName()) and + ( + op.getLeftOperand() = result and + op.getRightOperand() = vra + or + op.getLeftOperand() = vra and op.getRightOperand() = result + ) + ) +} + +private predicate isCredentialSink(DataFlow::Node node) { + node = credentialParameter() + or + node.asExpr().getExpr() = credentialKeywordArgument() + or + node.asExpr().getExpr() = credentialComparison() +} + +class CredentialSink extends DataFlow::Node { + CredentialSink() { isCredentialSink(this) } +} + +class HardcodedCredentialsConfiguration extends DataFlow::Configuration { + HardcodedCredentialsConfiguration() { this = "HardcodedCredentialsConfiguration" } + + override predicate isSource(DataFlow::Node source) { source instanceof HardcodedValueSource } + + override predicate isSink(DataFlow::Node sink) { sink instanceof CredentialSink } + + override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + exists(ExprNodes::BinaryOperationCfgNode binop | + ( + binop.getLeftOperand() = node1.asExpr() or + binop.getRightOperand() = node1.asExpr() + ) and + binop = node2.asExpr() and + // string concatenation + binop.getExpr() instanceof AddExpr + ) + } +} + +from DataFlow::PathNode source, DataFlow::PathNode sink, HardcodedCredentialsConfiguration conf +where conf.hasFlowPath(source, sink) +select source.getNode(), source, sink, "Use of $@.", source.getNode(), "hardcoded credentials" diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql new file mode 100644 index 00000000000..f727cf504d9 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql @@ -0,0 +1,15 @@ +/** + * @id rb/summary/lines-of-code + * @name Total lines of Ruby code in the database + * @description The total number of lines of Ruby code from the source code + * directory, including external libraries and auto-generated files. This is a + * useful metric of the size of a database. This query counts the lines of + * code, excluding whitespace or comments. + * @kind metric + * @tags summary + * lines-of-code + */ + +import ruby + +select sum(RubyFile f | exists(f.getRelativePath()) | f.getNumberOfLinesOfCode()) diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql new file mode 100644 index 00000000000..19f4f46fb8d --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql @@ -0,0 +1,19 @@ +/** + * @id rb/summary/lines-of-user-code + * @name Total Lines of user written Ruby code in the database + * @description The total number of lines of Ruby code from the source code + * directory, excluding external library and auto-generated files. This + * query counts the lines of code, excluding whitespace or comments. + * @kind metric + * @tags summary + */ + +import ruby + +select sum(RubyFile f | + f.fromSource() and + exists(f.getRelativePath()) and + not f.getAbsolutePath().matches("%/vendor/%") + | + f.getNumberOfLinesOfCode() + ) diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql new file mode 100644 index 00000000000..1a68d2c57e6 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql @@ -0,0 +1,15 @@ +/** + * @id rb/summary/number-of-files-extracted-with-errors + * @name Total number of files that were extracted with errors + * @description The total number of Ruby code files that we extracted, but where + * at least one extraction error occurred in the process. + * @kind metric + * @tags summary + */ + +import ruby +import codeql.ruby.Diagnostics + +select count(File f | + exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath()) + ) diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql new file mode 100644 index 00000000000..356989935e1 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql @@ -0,0 +1,15 @@ +/** + * @id rb/summary/number-of-successfully-extracted-files + * @name Total number of files that were extracted without error + * @description The total number of Ruby code files that we extracted without + * encountering any extraction errors + * @kind metric + * @tags summary + */ + +import ruby +import codeql.ruby.Diagnostics + +select count(File f | + not exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath()) + ) diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql new file mode 100644 index 00000000000..5ce06a0c182 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql @@ -0,0 +1,28 @@ +/** + * @name Useless assignment to local variable + * @description An assignment to a local variable that is not used later on, or whose value is always + * overwritten, has no effect. + * @kind problem + * @problem.severity warning + * @id rb/useless-assignment-to-local + * @tags maintainability + * external/cwe/cwe-563 + * @precision low + */ + +import ruby +import codeql.ruby.dataflow.SSA + +class RelevantLocalVariableWriteAccess extends LocalVariableWriteAccess { + RelevantLocalVariableWriteAccess() { + not this.getVariable().getName().charAt(0) = "_" and + not this = any(Parameter p).getAVariable().getDefiningAccess() + } +} + +from RelevantLocalVariableWriteAccess write, LocalVariable v +where + v = write.getVariable() and + exists(write.getAControlFlowNode()) and + not exists(Ssa::WriteDefinition def | def.getWriteAccess() = write) +select write, "This assignment to $@ is useless, since its value is never read.", v, v.getName() diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql new file mode 100644 index 00000000000..ef134eddd70 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql @@ -0,0 +1,32 @@ +/** + * @name Potentially uninitialized local variable + * @description Using a local variable before it is initialized gives the variable a default + * 'nil' value. + * @kind problem + * @problem.severity error + * @id rb/uninitialized-local-variable + * @tags reliability + * correctness + * @precision low + */ + +import ruby +import codeql.ruby.dataflow.SSA + +class RelevantLocalVariableReadAccess extends LocalVariableReadAccess { + RelevantLocalVariableReadAccess() { + not exists(MethodCall c | + c.getReceiver() = this and + c.getMethodName() = "nil?" + ) + } +} + +from RelevantLocalVariableReadAccess read, LocalVariable v +where + v = read.getVariable() and + exists(Ssa::Definition def | + def.getAnUltimateDefinition() instanceof Ssa::UninitializedDefinition and + read = def.getARead().getExpr() + ) +select read, "Local variable $@ may be used before it is initialized.", v, v.getName() diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql new file mode 100644 index 00000000000..1aa1a6bc462 --- /dev/null +++ b/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql @@ -0,0 +1,27 @@ +/** + * @name Unused parameter. + * @description A parameter that is not used later on, or whose value is always overwritten, + * can be removed. + * @kind problem + * @problem.severity warning + * @id rb/unused-parameter + * @tags maintainability + * external/cwe/cwe-563 + * @precision low + */ + +import ruby +import codeql.ruby.dataflow.SSA + +class RelevantParameterVariable extends LocalVariable { + RelevantParameterVariable() { + exists(Parameter p | + this = p.getAVariable() and + not this.getName().charAt(0) = "_" + ) + } +} + +from RelevantParameterVariable v +where not exists(Ssa::WriteDefinition def | def.getWriteAccess() = v.getDefiningAccess()) +select v, "Unused parameter."