diff --git a/.codeqlmanifest.json b/.codeqlmanifest.json index 24e20e77278..2c39a11f9ae 100644 --- a/.codeqlmanifest.json +++ b/.codeqlmanifest.json @@ -6,6 +6,8 @@ "*/ql/examples/qlpack.yml", "*/ql/consistency-queries/qlpack.yml", "cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml", + "go/ql/config/legacy-support/qlpack.yml", + "go/build/codeql-extractor-go/codeql-extractor.yml", "javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml", "javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml", "javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml", @@ -15,6 +17,7 @@ "misc/legacy-support/*/qlpack.yml", "misc/suite-helpers/qlpack.yml", "ruby/extractor-pack/codeql-extractor.yml", + "swift/extractor-pack/codeql-extractor.yml", "ql/extractor-pack/codeql-extractor.yml" ], "versionPolicies": { diff --git a/.gitattributes b/.gitattributes index 5953177325f..352b2eafe4b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -39,6 +39,7 @@ *.py text *.lua text *.expected text +*.go text # Explicitly set a bunch of known extensions to binary, because Git < 2.10 will treat # `* text=auto eol=lf` as `* text eol=lf` @@ -52,6 +53,14 @@ java/ql/test/stubs/**/*.java linguist-generated=true java/ql/test/experimental/stubs/**/*.java linguist-generated=true +# Force git not to modify line endings for go or html files under the go/ql directory +go/ql/**/*.go -text +go/ql/**/*.html -text +# Force git not to modify line endings for go dbschemes +go/*.dbscheme -text +# Preserve unusual line ending from codeql-go merge +go/extractor/opencsv/CSVReader.java -text + # For some languages, upgrade script testing references really old dbscheme # files from legacy upgrades that have CRLF line endings. Since upgrade # resolution relies on object hashes, we must suppress line ending conversion diff --git a/.github/problem-matchers/codeql-query-format.json b/.github/problem-matchers/codeql-query-format.json new file mode 100644 index 00000000000..35f9c1a6f64 --- /dev/null +++ b/.github/problem-matchers/codeql-query-format.json @@ -0,0 +1,14 @@ +{ + "problemMatcher": [ + { + "owner": "codeql-query-format", + "pattern": [ + { + "regexp": "^((.*) would change by autoformatting\\.)$", + "file": 2, + "message": 1 + } + ] + } + ] +} diff --git a/.github/problem-matchers/codeql-syntax-check.json b/.github/problem-matchers/codeql-syntax-check.json new file mode 100644 index 00000000000..d285ffd77d8 --- /dev/null +++ b/.github/problem-matchers/codeql-syntax-check.json @@ -0,0 +1,17 @@ +{ + "problemMatcher": [ + { + "owner": "codeql-syntax-check", + "pattern": [ + { + "regexp": "^((ERROR|WARNING): .* \\((.*):(\\d+),(\\d+)-\\d+\\))$", + "message": 1, + "file": 3, + "line": 4, + "col": 5, + "severity": 2 + } + ] + } + ] +} diff --git a/.github/problem-matchers/codeql-test-run.json b/.github/problem-matchers/codeql-test-run.json new file mode 100644 index 00000000000..918758b3390 --- /dev/null +++ b/.github/problem-matchers/codeql-test-run.json @@ -0,0 +1,14 @@ +{ + "problemMatcher": [ + { + "owner": "codeql-test-run", + "pattern": [ + { + "regexp": "(\\[.*\\] FAILED\\((RESULT|COMPILATION)\\) (.*))$", + "file": 3, + "message": 1 + } + ] + } + ] +} diff --git a/.github/problem-matchers/make.json b/.github/problem-matchers/make.json new file mode 100644 index 00000000000..8275f4851ab --- /dev/null +++ b/.github/problem-matchers/make.json @@ -0,0 +1,13 @@ +{ + "problemMatcher": [ + { + "owner": "make", + "pattern": [ + { + "regexp": "^(make: \\*\\*\\* .*)$", + "message": 1 + } + ] + } + ] +} diff --git a/.github/workflows/go-tests.yml b/.github/workflows/go-tests.yml new file mode 100644 index 00000000000..12e162adf29 --- /dev/null +++ b/.github/workflows/go-tests.yml @@ -0,0 +1,161 @@ +name: "Go: Run Tests" +on: + pull_request: + paths: + - "go/**" + - .github/workflows/go-tests.yml +jobs: + + test-linux: + name: Test Linux (Ubuntu) + runs-on: ubuntu-latest + steps: + + - name: Set up Go 1.18.1 + uses: actions/setup-go@v3 + with: + go-version: 1.18.1 + id: go + + - name: Set up CodeQL CLI + run: | + echo "Removing old CodeQL Directory..." + rm -rf $HOME/codeql + echo "Done" + cd $HOME + echo "Downloading CodeQL CLI..." + LATEST=$(gh release list --repo https://github.com/github/codeql-cli-binaries | cut -f 1 | sort --version-sort | grep -v beta | tail -1) + gh release download --repo https://github.com/github/codeql-cli-binaries --pattern codeql-linux64.zip "$LATEST" + echo "Done" + echo "Unpacking CodeQL CLI..." + unzip -q codeql-linux64.zip + rm -f codeql-linux64.zip + echo "Done" + env: + GITHUB_TOKEN: ${{ github.token }} + + - name: Check out code + uses: actions/checkout@v2 + + - name: Enable problem matchers in repository + shell: bash + run: 'find .github/problem-matchers -name \*.json -exec echo "::add-matcher::{}" \;' + + - name: Build + run: | + cd go + env PATH=$PATH:$HOME/codeql make + + - name: Check that all QL and Go code is autoformatted + run: | + cd go + env PATH=$PATH:$HOME/codeql make check-formatting + + - name: Compile qhelp files to markdown + run: | + cd go + env PATH=$PATH:$HOME/codeql QHELP_OUT_DIR=qhelp-out make qhelp-to-markdown + + - name: Upload qhelp markdown + uses: actions/upload-artifact@v2 + with: + name: qhelp-markdown + path: go/qhelp-out/**/*.md + + - name: Test + run: | + cd go + env PATH=$PATH:$HOME/codeql make test + + test-mac: + name: Test MacOS + runs-on: macOS-latest + steps: + - name: Set up Go 1.18.1 + uses: actions/setup-go@v3 + with: + go-version: 1.18.1 + id: go + + - name: Set up CodeQL CLI + run: | + echo "Removing old CodeQL Directory..." + rm -rf $HOME/codeql + echo "Done" + cd $HOME + echo "Downloading CodeQL CLI..." + LATEST=$(gh release list --repo https://github.com/github/codeql-cli-binaries | cut -f 1 | sort --version-sort | grep -v beta | tail -1) + gh release download --repo https://github.com/github/codeql-cli-binaries --pattern codeql-osx64.zip "$LATEST" + echo "Done" + echo "Unpacking CodeQL CLI..." + unzip -q codeql-osx64.zip + rm -f codeql-osx64.zip + echo "Done" + env: + GITHUB_TOKEN: ${{ github.token }} + + - name: Check out code + uses: actions/checkout@v2 + + - name: Enable problem matchers in repository + shell: bash + run: 'find .github/problem-matchers -name \*.json -exec echo "::add-matcher::{}" \;' + + - name: Build + run: | + cd go + env PATH=$PATH:$HOME/codeql make + + - name: Test + run: | + cd go + env PATH=$PATH:$HOME/codeql make test + + test-win: + name: Test Windows + runs-on: windows-2019 + steps: + - name: Set up Go 1.18.1 + uses: actions/setup-go@v3 + with: + go-version: 1.18.1 + id: go + + - name: Set up CodeQL CLI + run: | + echo "Removing old CodeQL Directory..." + rm -rf $HOME/codeql + echo "Done" + cd "$HOME" + echo "Downloading CodeQL CLI..." + LATEST=$(gh release list --repo https://github.com/github/codeql-cli-binaries | cut -f 1 | sort --version-sort | grep -v beta | tail -1) + gh release download --repo https://github.com/github/codeql-cli-binaries --pattern codeql-win64.zip "$LATEST" + echo "Done" + echo "Unpacking CodeQL CLI..." + unzip -q -o codeql-win64.zip + unzip -q -o codeql-win64.zip codeql/codeql.exe + rm -f codeql-win64.zip + echo "Done" + env: + GITHUB_TOKEN: ${{ github.token }} + shell: + bash + + - name: Check out code + uses: actions/checkout@v2 + + - name: Enable problem matchers in repository + shell: bash + run: 'find .github/problem-matchers -name \*.json -exec echo "::add-matcher::{}" \;' + + - name: Build + run: | + $Env:Path += ";$HOME\codeql" + cd go + make + + - name: Test + run: | + $Env:Path += ";$HOME\codeql" + cd go + make test diff --git a/.github/workflows/ql-for-ql-build.yml b/.github/workflows/ql-for-ql-build.yml index 84d0e2af101..6b4f6a0abee 100644 --- a/.github/workflows/ql-for-ql-build.yml +++ b/.github/workflows/ql-for-ql-build.yml @@ -140,7 +140,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - folder: [cpp, csharp, java, javascript, python, ql, ruby, swift] + folder: [cpp, csharp, java, javascript, python, ql, ruby, swift, go] needs: - package diff --git a/.github/workflows/ql-for-ql-dataset_measure.yml b/.github/workflows/ql-for-ql-dataset_measure.yml index 24702d76ac4..cf3b696f3b8 100644 --- a/.github/workflows/ql-for-ql-dataset_measure.yml +++ b/.github/workflows/ql-for-ql-dataset_measure.yml @@ -19,7 +19,6 @@ jobs: matrix: repo: - github/codeql - - github/codeql-go runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -77,7 +76,7 @@ jobs: path: stats - run: | python -m pip install --user lxml - find stats -name 'stats.xml' -print0 | sort -z | xargs -0 python ql/scripts/merge_stats.py --output ql/ql/src/ql.dbscheme.stats --normalise ql_tokeninfo + find stats -name 'stats.xml' -print0 | sort -z | xargs -0 python ruby/scripts/merge_stats.py --output ql/ql/src/ql.dbscheme.stats --normalise ql_tokeninfo - uses: actions/upload-artifact@v3 with: name: ql.dbscheme.stats diff --git a/.github/workflows/query-list.yml b/.github/workflows/query-list.yml index f8f2d451adb..7484cc4a7a4 100644 --- a/.github/workflows/query-list.yml +++ b/.github/workflows/query-list.yml @@ -20,11 +20,6 @@ jobs: uses: actions/checkout@v3 with: path: codeql - - name: Clone github/codeql-go - uses: actions/checkout@v3 - with: - repository: 'github/codeql-go' - path: codeql-go - name: Set up Python 3.8 uses: actions/setup-python@v3 with: diff --git a/.github/workflows/swift-codegen.yml b/.github/workflows/swift-codegen.yml index ded763f8dbb..b0415606415 100644 --- a/.github/workflows/swift-codegen.yml +++ b/.github/workflows/swift-codegen.yml @@ -25,8 +25,7 @@ jobs: git diff --exit-code --stat HEAD - name: Generate C++ files run: | - bazel run //swift/codegen:trapgen -- --cpp-output=$PWD/swift-generated-headers - bazel run //swift/codegen:cppgen -- --cpp-output=$PWD/swift-generated-headers + bazel run //swift/codegen:cppcodegen -- --cpp-output=$PWD/swift-generated-headers - uses: actions/upload-artifact@v3 with: name: swift-generated-headers diff --git a/.gitignore b/.gitignore index 9dd2effe951..fd9e5b6a07e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ # qltest projects and artifacts */ql/test/**/*.testproj */ql/test/**/*.actual +*/ql/test/**/go.sum # Visual studio temporaries, except a file used by QL4VS .vs/* @@ -42,3 +43,15 @@ csharp/extractor/Semmle.Extraction.CSharp.Driver/Properties/launchSettings.json # CLion project files /.clwb + +# Go build artifacts +go/build/* + +# Go binaries +go/tools/bin +go/tools/linux64 +go/tools/osx64 +go/tools/win64 +go/tools/tokenizer.jar +go/main + diff --git a/.lgtm.yml b/.lgtm.yml index ca0d8fa4eeb..b544d59f520 100755 --- a/.lgtm.yml +++ b/.lgtm.yml @@ -6,6 +6,7 @@ path_classifiers: test: - csharp/ql/src - csharp/ql/test + - go/ql/test - javascript/extractor/parser-tests - javascript/extractor/tests - javascript/ql/src @@ -13,6 +14,9 @@ path_classifiers: - python/ql/src - python/ql/test + example: + - go/ql/src + queries: - include: "*" diff --git a/CODEOWNERS b/CODEOWNERS index 5ee67c52fbc..0b185e40d45 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,5 +1,6 @@ /cpp/ @github/codeql-c-analysis /csharp/ @github/codeql-csharp +/go/ @github/codeql-go /java/ @github/codeql-java /javascript/ @github/codeql-javascript /python/ @github/codeql-python @@ -37,6 +38,7 @@ # Workflows /.github/workflows/ @github/codeql-ci-reviewers +/.github/workflows/go-* @github/codeql-go /.github/workflows/js-ml-tests.yml @github/codeql-ml-powered-queries-reviewers /.github/workflows/ql-for-ql-* @github/codeql-ql-for-ql-reviewers /.github/workflows/ruby-* @github/codeql-ruby diff --git a/README.md b/README.md index a27ba833474..0817a1de0ea 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # CodeQL -This open source repository contains the standard CodeQL libraries and queries that power [GitHub Advanced Security](https://github.com/features/security/code) and the other application security products that [GitHub](https://github.com/features/security/) makes available to its customers worldwide. For the queries, libraries, and extractor that power Go analysis, visit the [CodeQL for Go repository](https://github.com/github/codeql-go). +This open source repository contains the standard CodeQL libraries and queries that power [GitHub Advanced Security](https://github.com/features/security/code) and the other application security products that [GitHub](https://github.com/features/security/) makes available to its customers worldwide. ## How do I learn CodeQL and run queries? diff --git a/config/identical-files.json b/config/identical-files.json index f73a333b828..d971a482b90 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -22,6 +22,7 @@ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl3.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl4.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl5.qll", + "csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplForContentDataFlow.qll", "python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll", "python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll", "python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll", @@ -500,7 +501,8 @@ ], "CFG": [ "csharp/ql/lib/semmle/code/csharp/controlflow/internal/ControlFlowGraphImplShared.qll", - "ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll" + "ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll", + "swift/ql/lib/codeql/swift/controlflow/internal/ControlFlowGraphImplShared.qll" ], "TypeTracker": [ "python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll", @@ -558,4 +560,4 @@ "javascript/ql/src/Expressions/TypoDatabase.qll", "ql/ql/src/codeql_ql/style/TypoDatabase.qll" ] -} +} \ No newline at end of file diff --git a/cpp/ql/lib/semmle/code/cpp/commons/Printf.qll b/cpp/ql/lib/semmle/code/cpp/commons/Printf.qll index 703c4dd2879..71a31d03aac 100644 --- a/cpp/ql/lib/semmle/code/cpp/commons/Printf.qll +++ b/cpp/ql/lib/semmle/code/cpp/commons/Printf.qll @@ -872,7 +872,7 @@ class FormatLiteral extends Literal { private Type getConversionType1(int n) { exists(string cnv | cnv = this.getConversionChar(n) | - cnv.regexpMatch("d|i") and + cnv = ["d", "i"] and result = this.getIntegralConversion(n) and not result.getUnderlyingType().(IntegralType).isExplicitlySigned() and not result.getUnderlyingType().(IntegralType).isExplicitlyUnsigned() @@ -912,7 +912,7 @@ class FormatLiteral extends Literal { private Type getConversionType2(int n) { exists(string cnv | cnv = this.getConversionChar(n) | - cnv.regexpMatch("o|u|x|X") and + cnv = ["o", "u", "x", "X"] and result = this.getIntegralConversion(n) and result.getUnderlyingType().(IntegralType).isUnsigned() ) @@ -920,7 +920,7 @@ class FormatLiteral extends Literal { private Type getConversionType3(int n) { exists(string cnv | cnv = this.getConversionChar(n) | - cnv.regexpMatch("a|A|e|E|f|F|g|G") and result = this.getFloatingPointConversion(n) + cnv = ["a", "A", "e", "E", "f", "F", "g", "G"] and result = this.getFloatingPointConversion(n) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll index bb56e1b54fa..51e4faaeaef 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll @@ -216,10 +216,9 @@ private module LambdaFlow { or // jump step exists(Node mid, DataFlowType t0 | - revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, lastCall) and toReturn = false and - toJump = true and - lastCall = TDataFlowCallNone() + toJump = true | jumpStepCached(node, mid) and t = t0 @@ -789,24 +788,31 @@ private module Cached { cached predicate readSet(Node node1, ContentSet c, Node node2) { readStep(node1, c, node2) } + cached + predicate storeSet( + Node node1, ContentSet c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + readSet(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + private predicate store( Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType ) { - exists(ContentSet cs | c = cs.getAStoreContent() | - storeStep(node1, cs, node2) and - contentType = getNodeDataFlowType(node1) and - containerType = getNodeDataFlowType(node2) - or - exists(Node n1, Node n2 | - n1 = node1.(PostUpdateNode).getPreUpdateNode() and - n2 = node2.(PostUpdateNode).getPreUpdateNode() - | - argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, cs, contentType), n1) - or - readSet(n2, cs, n1) and - contentType = getNodeDataFlowType(n1) and - containerType = getNodeDataFlowType(n2) - ) + exists(ContentSet cs | + c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowVar.qll index 6a2b087c2e1..34b2ef5aaf9 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -549,7 +549,7 @@ module FlowVar_internal { bb = this.(Loop).getStmt() and v = this.getARelevantVariable() or - this.reachesWithoutAssignment(bb.getAPredecessor(), v) and + this.reachesWithoutAssignment(pragma[only_bind_out](bb.getAPredecessor()), v) and this.bbInLoop(bb) ) and not assignsToVar(bb, v) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll index bb56e1b54fa..51e4faaeaef 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll @@ -216,10 +216,9 @@ private module LambdaFlow { or // jump step exists(Node mid, DataFlowType t0 | - revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, lastCall) and toReturn = false and - toJump = true and - lastCall = TDataFlowCallNone() + toJump = true | jumpStepCached(node, mid) and t = t0 @@ -789,24 +788,31 @@ private module Cached { cached predicate readSet(Node node1, ContentSet c, Node node2) { readStep(node1, c, node2) } + cached + predicate storeSet( + Node node1, ContentSet c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + readSet(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + private predicate store( Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType ) { - exists(ContentSet cs | c = cs.getAStoreContent() | - storeStep(node1, cs, node2) and - contentType = getNodeDataFlowType(node1) and - containerType = getNodeDataFlowType(node2) - or - exists(Node n1, Node n2 | - n1 = node1.(PostUpdateNode).getPreUpdateNode() and - n2 = node2.(PostUpdateNode).getPreUpdateNode() - | - argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, cs, contentType), n1) - or - readSet(n2, cs, n1) and - contentType = getNodeDataFlowType(n1) and - containerType = getNodeDataFlowType(n2) - ) + exists(ContentSet cs | + c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType) ) } diff --git a/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql b/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql index 2a8aba6a961..dee723e2686 100644 --- a/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql +++ b/cpp/ql/src/Likely Bugs/Conversion/LossyFunctionResultCast.ql @@ -19,7 +19,7 @@ predicate whitelist(Function f) { "nearbyintl", "rint", "rintf", "rintl", "round", "roundf", "roundl", "trunc", "truncf", "truncl" ] or - f.getName().matches("__builtin_%") + f.getName().matches("\\_\\_builtin\\_%") } predicate whitelistPow(FunctionCall fc) { diff --git a/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql b/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql index 23cf7e8364b..40ed53609e8 100644 --- a/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql +++ b/cpp/ql/src/Likely Bugs/Memory Management/PotentialBufferOverflow.ql @@ -13,7 +13,7 @@ * @deprecated This query is deprecated, use * Potentially overrunning write (`cpp/overrunning-write`) and * Potentially overrunning write with float to string conversion - * (`cpp/overrunning-write-with-float) instead. + * (`cpp/overrunning-write-with-float`) instead. */ import cpp diff --git a/cpp/ql/src/experimental/Best Practices/WrongUintAccess.cpp b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.cpp new file mode 100644 index 00000000000..f0704299ffc --- /dev/null +++ b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.cpp @@ -0,0 +1,7 @@ +void test() +{ + uint16_t j = 256; + char testSubject[122]; + + testSubject[j] = 12; // You can use a uint8 here +} diff --git a/cpp/ql/src/experimental/Best Practices/WrongUintAccess.qhelp b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.qhelp new file mode 100644 index 00000000000..d298db04ed7 --- /dev/null +++ b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.qhelp @@ -0,0 +1,18 @@ + + + + +Find access to an array with a Uint16 when the array has a size lower than 256. + + + +Use a int with a lower bit size instead. For instance in this example use a 8 bit int. + + + + + + + diff --git a/cpp/ql/src/experimental/Best Practices/WrongUintAccess.ql b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.ql new file mode 100644 index 00000000000..ee1cca9b6e9 --- /dev/null +++ b/cpp/ql/src/experimental/Best Practices/WrongUintAccess.ql @@ -0,0 +1,25 @@ +/** + * @id cpp/wrong-uint-access + * @name Wrong Uint + * @descripion Acess an array of size lower than 256 with a uint16. + * @kind problem + * @problem.severity recommendation + * @tags efficiency + */ + +import cpp + +from Variable var, ArrayExpr useExpr, ArrayType defLine, VariableAccess use +where + var.getUnspecifiedType() = defLine and + use = useExpr.getArrayBase() and + var = use.getTarget() and + ( + useExpr.getArrayOffset().getType() instanceof UInt16_t or + useExpr.getArrayOffset().getType() instanceof UInt32_t or + useExpr.getArrayOffset().getType() instanceof UInt64_t + ) and + defLine.getArraySize() <= 256 +select useExpr, + "Using a " + useExpr.getArrayOffset().getType() + " to acess the array $@ of size " + + defLine.getArraySize() + ".", var, var.getName() diff --git a/cpp/ql/src/experimental/Security/CWE/CWE-266/IncorrectPrivilegeAssignment.ql b/cpp/ql/src/experimental/Security/CWE/CWE-266/IncorrectPrivilegeAssignment.ql index 8411d0ba2e2..5bdd5a21fe5 100644 --- a/cpp/ql/src/experimental/Security/CWE/CWE-266/IncorrectPrivilegeAssignment.ql +++ b/cpp/ql/src/experimental/Security/CWE/CWE-266/IncorrectPrivilegeAssignment.ql @@ -58,7 +58,7 @@ where // unfortunately cannot use numeric value here because // O_CREAT is defined differently on different OSes: // https://github.com/red/red/blob/92feb0c0d5f91e087ab35fface6906afbf99b603/runtime/definitions.reds#L477-L491 // this may introduce false negatives - fctmp.getArgument(1).(BitwiseOrExpr).getAChild*().getValueText().matches("O_CREAT") or + fctmp.getArgument(1).(BitwiseOrExpr).getAChild*().getValueText() = "O_CREAT" or fctmp.getArgument(1).getValueText().matches("%O_CREAT%") ) and fctmp.getNumberOfArguments() = 2 and diff --git a/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql b/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql index 9bb63cb6b29..ff4b1baec76 100644 --- a/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql +++ b/cpp/ql/src/jsf/4.05 Libraries/AV Rule 23.ql @@ -13,7 +13,7 @@ import cpp from Function f where - f.getName().regexpMatch("atof|atoi|atol") and + f.getName() = ["atof", "atoi", "atol"] and f.getFile().getAbsolutePath().matches("%stdlib.h") select f.getACallToThisFunction(), "AV Rule 23: The library functions atof, atoi and atol from library shall not be used." diff --git a/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql b/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql index 9e0fd1d5de8..3bf27d8121c 100644 --- a/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql +++ b/cpp/ql/src/jsf/4.05 Libraries/AV Rule 24.ql @@ -13,7 +13,7 @@ import cpp from Function f where - f.getName().regexpMatch("abort|exit|getenv|system") and + f.getName() = ["abort", "exit", "getenv", "system"] and f.getFile().getAbsolutePath().matches("%stdlib.h") select f.getACallToThisFunction(), "The library functions abort, exit, getenv and system from library should not be used." diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll index 2cb0f411b41..b2390c7b14e 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll @@ -515,11 +515,7 @@ Element interpretElement( /** * Holds if `c` has a `generated` summary. */ -predicate hasSummary(Callable c, boolean generated) { - exists(DataFlowCallable dc | - dc.asSummarizedCallable() = c and summaryElement(dc, _, _, _, generated) - ) -} +predicate hasSummary(Callable c, boolean generated) { summaryElement(c, _, _, _, generated) } cached private module Cached { diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll index dd98abc3448..08a9a8a5421 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll @@ -114,69 +114,7 @@ module SummaryComponentStack { SummaryComponentStack jump(Callable c) { result = singleton(SummaryComponent::jump(c)) } } -/** - * A class for synthesized callables given by a summary. - */ -abstract class SummarizedCallable extends DotNet::Callable { - SummarizedCallable() { this.isUnboundDeclaration() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if values stored inside `content` are cleared on objects passed as - * arguments at position `pos` to this callable. - */ - pragma[nomagic] - predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { none() } - - /** - * Holds if the summary is auto generated. - */ - predicate isAutoGenerated() { none() } -} - -private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable { - private SummarizedCallable sc; - - SummarizedCallableAdapter() { this = DataFlowDispatch::TSummarizedCallable(sc) } - - final override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - sc.propagatesFlow(input, output, preservesValue) - } - - final override predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { - sc.clearsContent(pos, content) - } - - final override predicate isAutoGenerated() { sc.isAutoGenerated() } -} +class SummarizedCallable = Impl::Public::SummarizedCallable; private predicate recordConstructorFlow(Constructor c, int i, Property p) { c = any(RecordType r).getAMember() and @@ -201,28 +139,6 @@ private class RecordConstructorFlow extends SummarizedCallable { } } -private class SummarizedCallableDefaultClearsContent extends Impl::Public::SummarizedCallable { - SummarizedCallableDefaultClearsContent() { - this instanceof Impl::Public::SummarizedCallable or none() - } - - // By default, we assume that all stores into arguments are definite - override predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { - exists(SummaryComponentStack output, SummaryComponent target | - this.propagatesFlow(_, output, _) and - output.drop(_) = - SummaryComponentStack::push(SummaryComponent::content(content), - SummaryComponentStack::singleton(target)) and - not content instanceof DataFlow::ElementContent - | - target = SummaryComponent::argument(pos.getPosition()) - or - target = SummaryComponent::qualifier() and - pos.isThisParameter() - ) - } -} - class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; private class RecordConstructorFlowRequiredSummaryComponentStack extends RequiredSummaryComponentStack { diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ContentDataFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ContentDataFlow.qll new file mode 100644 index 00000000000..2bdb56b2aa6 --- /dev/null +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ContentDataFlow.qll @@ -0,0 +1,529 @@ +/** + * Provides classes for performing global (inter-procedural) + * content-sensitive data flow analyses. + */ + +private import DataFlowImplCommon + +module ContentDataFlow { + private import DataFlowImplSpecific::Private + private import DataFlowImplSpecific::Private as DataFlowPrivate + private import DataFlowImplForContentDataFlow as DF + + class Node = DF::Node; + + class FlowFeature = DF::FlowFeature; + + class ContentSet = DF::ContentSet; + + predicate stageStats = DF::stageStats/8; + + /** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends ContentDataFlowConfiguration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * // Optionally override `getAFeature`. + * // Optionally override `accessPathLimit`. + * // Optionally override `isRelevantContent`. + * } + * ``` + * + * Unlike `DataFlow::Configuration` (on which this class is based), we allow + * for data to be stored (possibly nested) inside contents of sources and sinks. + * We track flow paths of the form + * + * ``` + * source --value-->* node + * (--read--> node --value-->* node)* + * --(non-value|value)-->* node + * (--store--> node --value-->* node)* + * --value-->* sink + * ``` + * + * where `--value-->` is a value-preserving flow step, `--read-->` is a read + * step, `--store-->` is a store step, and `--(non-value)-->` is a + * non-value-preserving flow step. + * + * That is, first a sequence of 0 or more reads, followed by 0 or more additional + * steps, followed by 0 or more stores, with value-preserving steps allowed in + * between all other steps. + */ + abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrier(Node node) { none() } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** Gets a limit on the number of reads out of sources and number of stores into sinks. */ + int accessPathLimit() { result = DataFlowPrivate::accessPathLimit() } + + /** Holds if `c` is relevant for reads out of sources or stores into sinks. */ + predicate isRelevantContent(ContentSet c) { any() } + + /** + * Holds if data stored inside `sourceAp` on `source` flows to `sinkAp` inside `sink` + * for this configuration. `preservesValue` indicates whether any of the additional + * flow steps defined by `isAdditionalFlowStep` are needed. + * + * For the source access path, `sourceAp`, the top of the stack represents the content + * that was last read from. That is, if `sourceAp` is `Field1.Field2` (with `Field1` + * being the top of the stack), then there is flow from `source.Field2.Field1`. + * + * For the sink access path, `sinkAp`, the top of the stack represents the content + * that was last stored into. That is, if `sinkAp` is `Field1.Field2` (with `Field1` + * being the top of the stack), then there is flow into `sink.Field1.Field2`. + */ + final predicate hasFlow( + Node source, AccessPath sourceAp, Node sink, AccessPath sinkAp, boolean preservesValue + ) { + exists(DF::PathNode pathSource, DF::PathNode pathSink | + this.(ConfigurationAdapter).hasFlowPath(pathSource, pathSink) and + nodeReaches(pathSource, TAccessPathNil(), TAccessPathNil(), pathSink, sourceAp, sinkAp) and + source = pathSource.getNode() and + sink = pathSink.getNode() + | + pathSink.getState().(InitState).decode(preservesValue) + or + pathSink.getState().(ReadState).decode(_, preservesValue) + or + pathSink.getState().(StoreState).decode(_, preservesValue) + ) + } + } + + /** A flow state representing no reads or stores. */ + private class InitState extends DF::FlowState { + private boolean preservesValue_; + + InitState() { this = "Init(" + preservesValue_ + ")" and preservesValue_ in [false, true] } + + predicate decode(boolean preservesValue) { preservesValue = preservesValue_ } + } + + /** A flow state representing that content has been stored into. */ + private class StoreState extends DF::FlowState { + private boolean preservesValue_; + private int size_; + + StoreState() { + preservesValue_ in [false, true] and + size_ in [1 .. any(Configuration c).accessPathLimit()] and + this = "StoreState(" + size_ + "," + preservesValue_ + ")" + } + + predicate decode(int size, boolean preservesValue) { + size = size_ and preservesValue = preservesValue_ + } + } + + /** A flow state representing that content has been read from. */ + private class ReadState extends DF::FlowState { + private boolean preservesValue_; + private int size_; + + ReadState() { + preservesValue_ in [false, true] and + size_ in [1 .. any(Configuration c).accessPathLimit()] and + this = "ReadState(" + size_ + "," + preservesValue_ + ")" + } + + predicate decode(int size, boolean preservesValue) { + size = size_ and preservesValue = preservesValue_ + } + } + + private predicate storeStep( + Node node1, DF::FlowState state1, ContentSet c, Node node2, StoreState state2, + Configuration config + ) { + exists(boolean preservesValue, int size | + storeSet(node1, c, node2, _, _) and + config.isRelevantContent(c) and + state2.decode(size + 1, preservesValue) + | + state1.(InitState).decode(preservesValue) and size = 0 + or + state1.(ReadState).decode(_, preservesValue) and size = 0 + or + state1.(StoreState).decode(size, preservesValue) + ) + } + + private predicate readStep( + Node node1, DF::FlowState state1, ContentSet c, Node node2, ReadState state2, + Configuration config + ) { + exists(int size | + readSet(node1, c, node2) and + config.isRelevantContent(c) and + state2.decode(size + 1, true) + | + state1.(InitState).decode(true) and + size = 0 + or + state1.(ReadState).decode(size, true) + ) + } + + private predicate additionalStep( + Node node1, DF::FlowState state1, Node node2, DF::FlowState state2, Configuration config + ) { + config.isAdditionalFlowStep(node1, node2) and + ( + state1 instanceof InitState and + state2.(InitState).decode(false) + or + exists(int size | + state1.(ReadState).decode(size, _) and + state2.(ReadState).decode(size, false) + ) + ) + } + + private class ConfigurationAdapter extends DF::Configuration { + private Configuration c; + + ConfigurationAdapter() { this = c } + + final override predicate isSource(Node source, DF::FlowState state) { + c.isSource(source) and + state.(InitState).decode(true) + } + + final override predicate isSink(Node sink, DF::FlowState state) { + c.isSink(sink) and + ( + state instanceof InitState or + state instanceof StoreState or + state instanceof ReadState + ) + } + + final override predicate isAdditionalFlowStep( + Node node1, DF::FlowState state1, Node node2, DF::FlowState state2 + ) { + storeStep(node1, state1, _, node2, state2, this) or + readStep(node1, state1, _, node2, state2, this) or + additionalStep(node1, state1, node2, state2, this) + } + + final override predicate isBarrier(Node node) { c.isBarrier(node) } + + final override FlowFeature getAFeature() { result = c.getAFeature() } + + // needed to record reads/stores inside summarized callables + final override predicate includeHiddenNodes() { any() } + } + + private newtype TAccessPath = + TAccessPathNil() or + TAccessPathCons(ContentSet head, AccessPath tail) { + nodeReachesStore(_, _, _, _, head, _, tail) + or + nodeReachesRead(_, _, _, _, head, tail, _) + } + + /** An access path. */ + class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + ContentSet getHead() { this = TAccessPathCons(result, _) } + + /** Gets the tail of this access path, if any. */ + AccessPath getTail() { this = TAccessPathCons(_, result) } + + /** + * Gets a textual representation of this access path. + * + * Elements are dot-separated, and the head of the stack is + * rendered first. + */ + string toString() { + this = TAccessPathNil() and + result = "" + or + exists(ContentSet head, AccessPath tail | + this = TAccessPathCons(head, tail) and + result = head + "." + tail + ) + } + } + + // important to use `edges` and not `PathNode::getASuccessor()`, as the latter + // is not pruned for reachability + private predicate pathSucc = DF::PathGraph::edges/2; + + /** + * Provides a big-step flow relation, where flow stops at read/store steps that + * must be recorded, and flow via `subpaths` such that reads/stores inside + * summarized callables can be recorded as well. + */ + private module BigStepFlow { + private predicate reachesSink(DF::PathNode node) { + any(ConfigurationAdapter config).isSink(node.getNode(), node.getState()) + or + exists(DF::PathNode mid | + pathSucc(node, mid) and + reachesSink(mid) + ) + } + + /** + * Holds if the flow step `pred -> succ` should not be allowed to be included + * in the big-step relation. + */ + pragma[nomagic] + private predicate excludeStep(DF::PathNode pred, DF::PathNode succ) { + pathSucc(pred, succ) and + ( + // we need to record reads/stores inside summarized callables + DF::PathGraph::subpaths(pred, _, _, succ) + or + // only allow flow into a summarized callable, as part of the big-step + // relation, when flow can reach a sink without going back out + DF::PathGraph::subpaths(pred, succ, _, _) and + not reachesSink(succ) + or + // needed to record store steps + storeStep(pred.getNode(), pred.getState(), _, succ.getNode(), succ.getState(), + pred.getConfiguration()) + or + // needed to record read steps + readStep(pred.getNode(), pred.getState(), _, succ.getNode(), succ.getState(), + pred.getConfiguration()) + ) + } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallableImpl(DF::PathNode node) { + result = getNodeEnclosingCallable(node.getNode()) + } + + pragma[inline] + private DataFlowCallable getEnclosingCallable(DF::PathNode node) { + pragma[only_bind_into](result) = getEnclosingCallableImpl(pragma[only_bind_out](node)) + } + + pragma[nomagic] + private predicate bigStepEntry(DF::PathNode node) { + node.getConfiguration() instanceof Configuration and + ( + any(ConfigurationAdapter config).isSource(node.getNode(), node.getState()) + or + excludeStep(_, node) + or + DF::PathGraph::subpaths(_, node, _, _) + ) + } + + pragma[nomagic] + private predicate bigStepExit(DF::PathNode node) { + node.getConfiguration() instanceof Configuration and + ( + bigStepEntry(node) + or + any(ConfigurationAdapter config).isSink(node.getNode(), node.getState()) + or + excludeStep(node, _) + or + DF::PathGraph::subpaths(_, _, node, _) + ) + } + + pragma[nomagic] + private predicate step(DF::PathNode pred, DF::PathNode succ) { + pathSucc(pred, succ) and + not excludeStep(pred, succ) + } + + pragma[nomagic] + private predicate stepRec(DF::PathNode pred, DF::PathNode succ) { + step(pred, succ) and + not bigStepEntry(pred) + } + + private predicate stepRecPlus(DF::PathNode n1, DF::PathNode n2) = fastTC(stepRec/2)(n1, n2) + + /** + * Holds if there is flow `pathSucc+(pred) = succ`, and such a flow path does + * not go through any reads/stores that need to be recorded, or summarized + * steps. + */ + pragma[nomagic] + private predicate bigStep(DF::PathNode pred, DF::PathNode succ) { + exists(DF::PathNode mid | + bigStepEntry(pred) and + step(pred, mid) + | + succ = mid + or + stepRecPlus(mid, succ) + ) and + bigStepExit(succ) + } + + pragma[nomagic] + predicate bigStepNotLocal(DF::PathNode pred, DF::PathNode succ) { + bigStep(pred, succ) and + not getEnclosingCallable(pred) = getEnclosingCallable(succ) + } + + pragma[nomagic] + predicate bigStepMaybeLocal(DF::PathNode pred, DF::PathNode succ) { + bigStep(pred, succ) and + getEnclosingCallable(pred) = getEnclosingCallable(succ) + } + } + + /** + * Holds if `source` can reach `node`, having read `reads` from the source and + * written `stores` into `node`. + * + * `source` is either a source from a configuration, in which case `scReads` and + * `scStores` are always empty, or it is the parameter of a summarized callable, + * in which case `scReads` and `scStores` record the reads/stores for a summary + * context, that is, the reads/stores for an argument that can reach the parameter. + */ + pragma[nomagic] + private predicate nodeReaches( + DF::PathNode source, AccessPath scReads, AccessPath scStores, DF::PathNode node, + AccessPath reads, AccessPath stores + ) { + exists(ConfigurationAdapter config | + node = source and + reads = scReads and + stores = scStores + | + config.hasFlowPath(source, _) and + scReads = TAccessPathNil() and + scStores = TAccessPathNil() + or + // the argument in a sub path can be reached, so we start flow from the sub path + // parameter, while recording the read/store summary context + exists(DF::PathNode arg | + nodeReachesSubpathArg(_, _, _, arg, scReads, scStores) and + DF::PathGraph::subpaths(arg, source, _, _) + ) + ) + or + exists(DF::PathNode mid | + nodeReaches(source, scReads, scStores, mid, reads, stores) and + BigStepFlow::bigStepMaybeLocal(mid, node) + ) + or + exists(DF::PathNode mid | + nodeReaches(source, scReads, scStores, mid, reads, stores) and + BigStepFlow::bigStepNotLocal(mid, node) and + // when flow is not local, we cannot flow back out, so we may stop + // flow early when computing summary flow + any(ConfigurationAdapter config).hasFlowPath(source, _) and + scReads = TAccessPathNil() and + scStores = TAccessPathNil() + ) + or + // store step + exists(AccessPath storesMid, ContentSet c | + nodeReachesStore(source, scReads, scStores, node, c, reads, storesMid) and + stores = TAccessPathCons(c, storesMid) + ) + or + // read step + exists(AccessPath readsMid, ContentSet c | + nodeReachesRead(source, scReads, scStores, node, c, readsMid, stores) and + reads = TAccessPathCons(c, readsMid) + ) + or + // flow-through step; match outer stores/reads with inner store/read summary contexts + exists(DF::PathNode mid, AccessPath innerScReads, AccessPath innerScStores | + nodeReachesSubpathArg(source, scReads, scStores, mid, innerScReads, innerScStores) and + subpathArgReachesOut(mid, innerScReads, innerScStores, node, reads, stores) + ) + } + + pragma[nomagic] + private predicate nodeReachesStore( + DF::PathNode source, AccessPath scReads, AccessPath scStores, DF::PathNode node, ContentSet c, + AccessPath reads, AccessPath stores + ) { + exists(DF::PathNode mid | + nodeReaches(source, scReads, scStores, mid, reads, stores) and + storeStep(mid.getNode(), mid.getState(), c, node.getNode(), node.getState(), + node.getConfiguration()) and + pathSucc(mid, node) + ) + } + + pragma[nomagic] + private predicate nodeReachesRead( + DF::PathNode source, AccessPath scReads, AccessPath scStores, DF::PathNode node, ContentSet c, + AccessPath reads, AccessPath stores + ) { + exists(DF::PathNode mid | + nodeReaches(source, scReads, scStores, mid, reads, stores) and + readStep(mid.getNode(), mid.getState(), c, node.getNode(), node.getState(), + node.getConfiguration()) and + pathSucc(mid, node) + ) + } + + pragma[nomagic] + private predicate nodeReachesSubpathArg( + DF::PathNode source, AccessPath scReads, AccessPath scStores, DF::PathNode arg, + AccessPath reads, AccessPath stores + ) { + nodeReaches(source, scReads, scStores, arg, reads, stores) and + DF::PathGraph::subpaths(arg, _, _, _) + } + + pragma[nomagic] + private predicate subpathArgReachesOut( + DF::PathNode arg, AccessPath scReads, AccessPath scStores, DF::PathNode out, AccessPath reads, + AccessPath stores + ) { + exists(DF::PathNode source, DF::PathNode ret | + nodeReaches(source, scReads, scStores, ret, reads, stores) and + DF::PathGraph::subpaths(arg, source, ret, out) + ) + } +} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll index 2310ce1ed44..d35b741fb0c 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll @@ -83,13 +83,32 @@ newtype TReturnKind = ) } +/** + * A summarized callable where the summary should be used for dataflow analysis. + */ +class DataFlowSummarizedCallable instanceof FlowSummary::SummarizedCallable { + DataFlowSummarizedCallable() { + not this.fromSource() + or + this.fromSource() and not this.isAutoGenerated() + } + + string toString() { result = super.toString() } +} + private module Cached { + /** + * The following heuristic is used to rank when to use source code or when to use summaries for DataFlowCallables. + * 1. Use hand written summaries. + * 2. Use source code. + * 3. Use auto generated summaries. + */ cached newtype TDataFlowCallable = TDotNetCallable(DotNet::Callable c) { - c.isUnboundDeclaration() and not c instanceof FlowSummary::SummarizedCallable + c.isUnboundDeclaration() and not c instanceof DataFlowSummarizedCallable } or - TSummarizedCallable(FlowSummary::SummarizedCallable c) + TSummarizedCallable(DataFlowSummarizedCallable sc) cached newtype TDataFlowCall = @@ -457,7 +476,7 @@ class SummaryCall extends DelegateDataFlowCall, TSummaryCall { override DataFlow::Node getNode() { none() } - override DataFlowCallable getEnclosingCallable() { result = c } + override DataFlowCallable getEnclosingCallable() { result.asSummarizedCallable() = c } override string toString() { result = "[summary] call to " + receiver + " in " + c } diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll index bb56e1b54fa..51e4faaeaef 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll @@ -216,10 +216,9 @@ private module LambdaFlow { or // jump step exists(Node mid, DataFlowType t0 | - revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, lastCall) and toReturn = false and - toJump = true and - lastCall = TDataFlowCallNone() + toJump = true | jumpStepCached(node, mid) and t = t0 @@ -789,24 +788,31 @@ private module Cached { cached predicate readSet(Node node1, ContentSet c, Node node2) { readStep(node1, c, node2) } + cached + predicate storeSet( + Node node1, ContentSet c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + readSet(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + private predicate store( Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType ) { - exists(ContentSet cs | c = cs.getAStoreContent() | - storeStep(node1, cs, node2) and - contentType = getNodeDataFlowType(node1) and - containerType = getNodeDataFlowType(node2) - or - exists(Node n1, Node n2 | - n1 = node1.(PostUpdateNode).getPreUpdateNode() and - n2 = node2.(PostUpdateNode).getPreUpdateNode() - | - argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, cs, contentType), n1) - or - readSet(n2, cs, n1) and - contentType = getNodeDataFlowType(n1) and - containerType = getNodeDataFlowType(n2) - ) + exists(ContentSet cs | + c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType) ) } diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplForContentDataFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplForContentDataFlow.qll new file mode 100644 index 00000000000..fb773ea89f8 --- /dev/null +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplForContentDataFlow.qll @@ -0,0 +1,5389 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + predicate isSource(Node source) { none() } + + /** + * Holds if `source` is a relevant data flow source with the given initial + * `state`. + */ + predicate isSource(Node source, FlowState state) { none() } + + /** + * Holds if `sink` is a relevant data flow sink. + */ + predicate isSink(Node sink) { none() } + + /** + * Holds if `sink` is a relevant data flow sink accepting `state`. + */ + predicate isSink(Node source, FlowState state) { none() } + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** + * Holds if data flow through `node` is prohibited when the flow state is + * `state`. + */ + predicate isBarrier(Node node, FlowState state) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if data flow through nodes guarded by `guard` is prohibited when + * the flow state is `state` + */ + predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) { + none() + } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, ContentSet c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { this.hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if hidden nodes should be included in the data flow graph. + * + * This feature should only be used for debugging or when the data flow graph + * is not visualized (for example in a `path-problem` query). + */ + predicate includeHiddenNodes() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSource(n, _)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n | this.isSink(n, _)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.asNode().(ParamNode).isParameterOf(c, pos) + } + + ParameterPosition getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) + | + config.isSource(n) or config.isSource(n, _) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) + | + config.isSink(n) or config.isSink(n, _) + ) +} + +pragma[nomagic] +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) and + not config.isSource(n, _) + or + config.isBarrierOut(n) and + not config.isSink(n) and + not config.isSink(n, _) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + ) +} + +pragma[nomagic] +private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n, state) + or + exists(BarrierGuard g | + config.isBarrierGuard(g, state) and + n = g.getAGuardedNode() + ) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSource(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSource(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +pragma[nomagic] +private predicate sinkNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSink(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSink(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(n1, n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n and + not fullBarrier(node2, config) + ) +} + +private predicate additionalLocalStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, s1, n2, s2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(n1, n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, n2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate additionalJumpStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(n1, s1, n2, s2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +pragma[nomagic] +private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) { + readSet(node1.asNode(), c, node2.asNode()) and + stepFilter(node1, node2, config) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(ContentSet cs | + readSet(node1, cs, node2, config) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate clearsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + clearsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate expectsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + expectsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +pragma[nomagic] +private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) } + +pragma[nomagic] +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(node1.asNode(), tc, node2.asNode(), contentType) and + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + sourceNode(node, _, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | fwdFlow(mid, cc, config) | + localFlowStep(mid, node, config) or + additionalLocalFlowStep(mid, node, config) or + additionalLocalStateStep(mid, _, node, _, config) + ) + or + exists(NodeEx mid | fwdFlow(mid, _, config) and cc = false | + jumpStep(mid, node, config) or + additionalJumpStep(mid, node, config) or + additionalJumpStateStep(mid, _, node, _, config) + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(ContentSet c | + fwdFlowReadSet(c, node, cc, config) and + fwdFlowConsCandSet(c, _, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + readSet(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `cs` may be interpreted in a read as the target of some store + * into `c`, in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) { + fwdFlowConsCand(c, config) and + c = cs.getAReadContent() + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + private predicate stateStepFwd(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1 | + additionalLocalStateStep(node1, state1, _, state2, config) or + additionalJumpStateStep(node1, state1, _, state2, config) + | + fwdFlow(node1, config) + ) + } + + private predicate fwdFlowState(FlowState state, Configuration config) { + sourceNode(_, state, config) + or + exists(FlowState state0 | + fwdFlowState(state0, config) and + stateStepFwd(state0, state, config) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + exists(FlowState state | + fwdFlow(node, pragma[only_bind_into](config)) and + sinkNode(node, state, config) and + fwdFlowState(state, pragma[only_bind_into](config)) and + if hasSinkCallCtx(config) then toReturn = true else toReturn = false + ) + or + exists(NodeEx mid | revFlow(mid, toReturn, config) | + localFlowStep(node, mid, config) or + additionalLocalFlowStep(node, mid, config) or + additionalLocalStateStep(node, _, mid, _, config) + ) + or + exists(NodeEx mid | revFlow(mid, _, config) and toReturn = false | + jumpStep(node, mid, config) or + additionalJumpStep(node, mid, config) or + additionalJumpStateStep(node, _, mid, _, config) + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, ContentSet c | + readSet(node, c, mid, config) and + fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, ContentSet cs | + fwdFlow(node, pragma[only_bind_into](config)) and + readSet(node, cs, mid, config) and + fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + pragma[nomagic] + predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + private predicate stateStepRev(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + additionalLocalStateStep(node1, state1, node2, state2, config) or + additionalJumpStateStep(node1, state1, node2, state2, config) + | + revFlow(node1, _, pragma[only_bind_into](config)) and + revFlow(node2, _, pragma[only_bind_into](config)) and + fwdFlowState(state1, pragma[only_bind_into](config)) and + fwdFlowState(state2, pragma[only_bind_into](config)) + ) + } + + predicate revFlowState(FlowState state, Configuration config) { + exists(NodeEx node | + sinkNode(node, state, config) and + revFlow(node, _, pragma[only_bind_into](config)) and + fwdFlowState(state, pragma[only_bind_into](config)) + ) + or + exists(FlowState state0 | + revFlowState(state0, config) and + stateStepRev(state, state0, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + bindingset[node, state, config] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow(node, toReturn, pragma[only_bind_into](config)) and + exists(state) and + exists(returnAp) and + exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + pragma[nomagic] + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | fwdFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | revFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc] + private LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + private predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalStateStep(node1, state1, node2, state2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + expectsContentEx(node, c) + ) + } + + bindingset[node, state, ap, config] + private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + PrevStage::revFlowState(state, pragma[only_bind_into](config)) and + exists(ap) and + not stateBarrier(node, state, config) and + ( + notExpectsContent(node) + or + ap = true and + expectsContentCand(node, config) + ) + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + bindingset[node, state, config] + private predicate flowCand(NodeEx node, FlowState state, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, state, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + ccc.matchesCall(call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, state, cc, argAp, ap, config) and + flowCand(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow(node, state, toReturn, returnAp, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) or + expectsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) { + Stage2::revFlow(node, state, config) and + ( + sourceNode(node, state, config) + or + jumpStep(_, node, config) + or + additionalJumpStep(_, node, config) + or + additionalJumpStateStep(_, _, node, state, config) + or + node instanceof ParamNodeEx + or + node.asNode() instanceof OutNodeExt + or + Stage2::storeStepCand(_, _, _, node, _, config) + or + Stage2::readStepCand(_, _, node, config) + or + node instanceof FlowCheckNode + or + exists(FlowState s | + additionalLocalStateStep(_, s, node, state, config) and + s != state + ) + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, FlowState state, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, state, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + Stage2::storeStepCand(node, _, _, next, _, config) or + Stage2::readStepCand(node, _, next, config) + ) + or + exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) | + additionalJumpStateStep(node, state, next, s, config) + or + additionalLocalStateStep(node, state, next, s, config) and + s != state + ) + or + Stage2::revFlow(node, state, config) and + node instanceof FlowCheckNode + or + sinkNode(node, state, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 and + Stage2::revFlow(node1, pragma[only_bind_into](state1), _, _, false, + pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), _, _, false, + pragma[only_bind_into](config)) + or + additionalLocalStateStep(node1, state1, node2, state2, config) and + Stage2::revFlow(node1, state1, _, _, false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, state2, _, _, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t, + Configuration config, LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() and // irrelevant dummy value + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + or + additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) + or + exists(NodeEx mid | + localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t, + pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + AccessPathFrontNil apf, Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, state1, config) and + state1 = state2 + or + additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and + state1 != state2 and + preservesValue = false and + apf = TFrontNil(node2.getDataFlowType()) and + callContext.relevantFor(node1.getEnclosingCallable()) and + not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() | + isUnreachableInCallCached(node1.asNode(), call) or + isUnreachableInCallCached(node2.asNode(), call) + ) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + CcCall ccSomeCall() { result = true } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } + + bindingset[node, cc] + private LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + private predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clearSet(NodeEx node, ContentSet c, Configuration config) { + PrevStage::revFlow(node, config) and + clearsContentCached(node.asNode(), c) + } + + pragma[nomagic] + private predicate clearContent(NodeEx node, Content c, Configuration config) { + exists(ContentSet cs | + PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and + c = cs.getAReadContent() and + clearSet(node, cs, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap, Configuration config) { + clearContent(node, ap.getHead().getContent(), config) + } + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and + expectsContentEx(node, c) and + c = ap.getHead().getContent() + ) + } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, state, ap, config] + private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + exists(state) and + exists(config) and + not clear(node, ap, config) and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and + ( + notExpectsContent(node) + or + expectsContentCand(node, ap, config) + ) + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + bindingset[node, state, config] + private predicate flowCand(NodeEx node, FlowState state, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, state, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + ccc.matchesCall(call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, state, cc, argAp, ap, config) and + flowCand(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow(node, state, toReturn, returnAp, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx( + NodeEx node, FlowState state, AccessPathFront argApf, Configuration config +) { + exists(AccessPathFront apf | + Stage3::revFlow(node, state, true, _, apf, config) and + Stage3::fwdFlow(node, state, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n, FlowState state | + Stage3::revFlow(n, state, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c, innercc] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[node, cc] + private LocalCc getLocalCc(NodeEx node, Cc cc) { + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + private predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + exists(FlowState state | + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, _, _, + pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, _, _, + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + exists(FlowState state | + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, _, _, + pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, _, _, + pragma[only_bind_into](config)) + ) + } + + bindingset[node, state, ap, config] + private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + bindingset[node, state, config] + private predicate flowCand(NodeEx node, FlowState state, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, state, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + ccc.matchesCall(call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, state, cc, argAp, ap, config) and + flowCand(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow(node, state, toReturn, returnAp, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +pragma[nomagic] +private predicate nodeMayUseSummary0( + NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, _, _) and + Stage4::revFlow(n, state, true, _, apa0, config) and + Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +pragma[nomagic] +private predicate nodeMayUseSummary( + NodeEx n, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(DataFlowCallable c | + Stage4::parameterMayFlowThrough(_, c, apa, config) and + nodeMayUseSummary0(n, c, state, apa, config) + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) { + exists(Configuration config | + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), config) and + Stage4::revFlow(p, state, _, _, _, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private FlowState s; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, s, ap) } + + ParameterPosition getParameterPos() { p.isParameterOf(_, result) } + + ParamNodeEx getParamNode() { result = p } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n, FlowState state | + Stage4::revFlow(n, state, _, _, apa, config) or nodeMayUseSummary(n, state, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid( + NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config + ) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, state, config) and + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, state, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, state, _, _, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, FlowState state, Configuration config) { + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + state = sink.getState() and + config = sink.getConfiguration() + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + this.length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the `FlowState` of this node. */ + FlowState getState() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private PathNode getASuccessorIfHidden() { + this.(PathNodeImpl).isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.(PathNodeImpl).isHidden() and + not result.(PathNodeImpl).isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + not this.getConfiguration().includeHiddenNodes() and + ( + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + ) + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + override string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNode n) { + n instanceof PathNodeSink or directReach(n.getASuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(a) and reach(b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + Subpaths::subpaths(arg, par, ret, out) and + reach(arg) and + reach(par) and + reach(ret) and + reach(out) + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + FlowState state; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, state, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = this.getSuccMid() + or + // a final step to a sink + result = this.getSuccMid().projectToSink() + } + + override predicate isSource() { + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } + + predicate isAtSink() { + sinkNode(node, state, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getState() = state and + result.getConfiguration() = unbindConf(config) + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + FlowState state; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, state, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, state, config) } +} + +private predicate pathNode( + PathNodeMid mid, NodeEx midnode, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, + Configuration conf, LocalCallContext localCC +) { + midnode = mid.getNodeEx() and + state = mid.getState() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap = mid.getAp() +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +pragma[nomagic] +private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC | + pathNode(mid, midnode, state0, cc, sc, ap, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, true, _, conf, localCC) + ) + or + exists( + AccessPath ap0, NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC + | + pathNode(mid, midnode, state0, cc, sc, ap0, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, state, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, state, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, state, _, cc, sc, _, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, state, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, state, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, FlowState state, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPathApprox apa, Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, state, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, FlowState state, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, state, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, ParameterPosition ppos, FlowState state, CallContext cc, DataFlowCall call, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(ArgNodeEx arg, ArgumentPosition apos | + pathNode(mid, arg, state, cc, _, ap, config, _) and + arg.asNode().(ArgNode).argumentOf(call, apos) and + apa = ap.getApprox() and + parameterMatch(ppos, apos) + ) +} + +pragma[nomagic] +private predicate parameterCand( + DataFlowCallable callable, ParameterPosition pos, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, _, _, apa, config) and + p.isParameterOf(callable, pos) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, AccessPath ap, Configuration config +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, ap, + pragma[only_bind_into](apa), pragma[only_bind_into](config)) and + callable = resolveCall(call, outercc) and + parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +pragma[nomagic] +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, FlowState state, CallContext outercc, CallContextCall innercc, + SummaryCtx sc, DataFlowCall call, Configuration config +) { + exists(ParameterPosition pos, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + ( + sc = TSummaryCtxSome(p, state, ap) + or + not exists(TSummaryCtxSome(p, state, ap)) and + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, FlowState state, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, + AccessPathApprox apa, Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, ParameterPosition pos | + pathNode(mid, ret, state, cc, sc, ap, config, _) and + kind = ret.getKind() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, state, innercc, sc, ap, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, NodeEx out, FlowState state, CallContext cc, AccessPath ap +) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, state, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + exists(Configuration config | + pathThroughCallable(arg, out, pragma[only_bind_into](sout), _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, _, innercc, sc, _, config) and + paramFlowsThrough(kind, pragma[only_bind_into](sout), innercc, sc, + pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `sout`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and + pathNode(ret, retnode, sout, innercc, sc, apout, unbindConf(getPathNodeConf(arg)), _) and + kind = retnode.getKind() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, _, n2, _, _, _, _, _) or + store(n1, _, n2, _, _) or + readSet(n1, _, n2, _) + ) + } + + pragma[nomagic] + private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) { + succ = pred.getASuccessor() and + succNode = succ.getNodeEx() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) { + exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 | + pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and + subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and + hasSuccessor(pragma[only_bind_into](arg), par, p) and + not ret.isHidden() and + pathNode(out0, o, sout, _, _, apout, _, _) + | + out = out0 or out = out0.projectToSink() + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath that can reach a sink. + */ + predicate retReach(PathNode n) { + exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out)) + or + exists(PathNode mid | + retReach(mid) and + n.getASuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples +) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + Configuration config +) { + stage = "1 Fwd" and + n = 10 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "1 Rev" and + n = 15 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Fwd" and + n = 20 and + Stage2::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Rev" and + n = 25 and + Stage2::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Fwd" and + n = 30 and + Stage3::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Rev" and + n = 35 and + Stage3::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Fwd" and + n = 40 and + Stage4::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Rev" and + n = 45 and + Stage4::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, states, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, states, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + additionalJumpStateStep(node1, _, node2, _, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) or config.isSource(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) or config.isSink(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + (config.isSource(n) or config.isSource(n, _)) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + (config.isSink(n) or config.isSink(n, _)) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private predicate relevantState(FlowState state) { + sourceNode(_, state, _) or + sinkNode(_, state, _) or + additionalLocalStateStep(_, state, _, _, _) or + additionalLocalStateStep(_, _, _, state, _) or + additionalJumpStateStep(_, state, _, _, _) or + additionalJumpStateStep(_, _, _, state, _) + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TSummaryCtx3 = + TSummaryCtx3None() or + TSummaryCtx3Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TRevSummaryCtx3 = + TRevSummaryCtx3None() or + TRevSummaryCtx3Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = TPartialNil(node.getDataFlowType()) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, state, cc, sc1, sc2, sc3, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, TRevSummaryCtx3 sc3, + RevPartialAccessPath ap, Configuration config + ) { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and + not clearsContentEx(node, ap.getHead()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead()) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + not clearsContentEx(node, ap.getHead().getContent()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead().getContent()) + ) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + FlowState getState() { none() } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + FlowState state; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + TSummaryCtx3 sc3; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, state, cc, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TSummaryCtx3 getSummaryCtx3() { result = sc3 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx1(), result.getSummaryCtx2(), result.getSummaryCtx3(), result.getAp(), + result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + FlowState state; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + TRevSummaryCtx3 sc3; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, state, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TRevSummaryCtx3 getSummaryCtx3() { result = sc3 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getState(), this.getSummaryCtx1(), + this.getSummaryCtx2(), this.getSummaryCtx3(), this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, + TSummaryCtx2 sc2, TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalLocalStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, state, _, cc, sc1, sc2, sc3, _, ap, config) + or + partialPathOutOfCallable(mid, node, state, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() + or + partialPathThroughCallable(mid, node, state, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + bindingset[result, i] + private int unbindInt(int i) { pragma[only_bind_out](i) = pragma[only_bind_out](result) } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, FlowState state, CallContext innercc, + PartialAccessPath ap, Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, state, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, state, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, ParameterPosition ppos, FlowState state, CallContext cc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ArgNode arg, ArgumentPosition apos | + arg = mid.getNodeEx().asNode() and + state = mid.getState() and + cc = mid.getCallContext() and + arg.argumentOf(call, apos) and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, pos, state, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, FlowState state, CallContext outercc, + CallContextCall innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ParameterPosition pos, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(state) and + sc3 = TSummaryCtx3Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, FlowState state, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3 | + partialPathIntoCallable(mid, _, _, cc, innercc, sc1, sc2, sc3, call, _, config) and + paramFlowsThroughInPartialPath(kind, state, innercc, sc1, sc2, sc3, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, state, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[nomagic] + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalLocalStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalJumpStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, state, sc1, sc2, sc3, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, state, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, FlowState state, TRevSummaryCtx1Some sc1, + TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3, DataFlowCall call, RevPartialAccessPath ap, + Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + mid.getState() = state and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(state) and + sc3 = TRevSummaryCtx3Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + ArgumentPosition apos, FlowState state, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + TRevSummaryCtx3Some sc3, RevPartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p, ParameterPosition ppos | + mid.getNodeEx() = p and + mid.getState() = state and + p.getPosition() = ppos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, ArgumentPosition pos, FlowState state, + RevPartialAccessPath ap, Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3 | + revPartialPathIntoReturn(mid, _, _, sc1, sc2, sc3, call, _, config) and + revPartialPathFlowsThrough(pos, state, sc1, sc2, sc3, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, FlowState state, RevPartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ArgumentPosition pos | + revPartialPathThroughCallable0(call, mid, pos, state, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index 919c710f5fa..3d013a504c5 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -421,7 +421,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { or exists(Ssa::Definition def | LocalFlow::localSsaFlowStepUseUse(def, nodeFrom, nodeTo) and - not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom) and + not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, + any(DataFlowSummarizedCallable sc)) and not LocalFlow::usesInstanceField(def) ) or @@ -739,13 +740,10 @@ private module Cached { ) ) } or - TSummaryNode( - FlowSummaryImpl::Public::SummarizedCallable c, - FlowSummaryImpl::Private::SummaryNodeState state - ) { + TSummaryNode(DataFlowSummarizedCallable c, FlowSummaryImpl::Private::SummaryNodeState state) { FlowSummaryImpl::Private::summaryNodeRange(c, state) } or - TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) { + TSummaryParameterNode(DataFlowSummarizedCallable c, ParameterPosition pos) { FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos) } or TParamsArgumentNode(ControlFlow::Node callCfn) { @@ -769,7 +767,8 @@ private module Cached { or // Simple flow through library code is included in the exposed local // step relation, even though flow is technically inter-procedural - FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true) + FlowSummaryImpl::Private::Steps::summaryThroughStepValue(nodeFrom, nodeTo, + any(DataFlowSummarizedCallable sc)) } cached @@ -976,17 +975,15 @@ private module ParameterNodes { SummaryParameterNode() { this = TSummaryParameterNode(sc, pos_) } override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { - sc = c and pos = pos_ + sc = c.asSummarizedCallable() and pos = pos_ } - override DataFlowCallable getEnclosingCallableImpl() { result = sc } + override DataFlowCallable getEnclosingCallableImpl() { result.asSummarizedCallable() = sc } override Type getTypeImpl() { - exists(int i | - pos_.getPosition() = i and result = sc.asSummarizedCallable().getParameter(i).getType() - ) + exists(int i | pos_.getPosition() = i and result = sc.getParameter(i).getType()) or - pos_.isThisParameter() and result = sc.asSummarizedCallable().getDeclaringType() + pos_.isThisParameter() and result = sc.getDeclaringType() } override ControlFlow::Node getControlFlowNodeImpl() { none() } @@ -1464,7 +1461,7 @@ class SummaryNode extends NodeImpl, TSummaryNode { SummaryNode() { this = TSummaryNode(c, state) } - override DataFlowCallable getEnclosingCallableImpl() { result = c } + override DataFlowCallable getEnclosingCallableImpl() { result.asSummarizedCallable() = c } override DataFlowType getDataFlowType() { result = FlowSummaryImpl::Private::summaryNodeType(this) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll index 439d70175e2..d907032547d 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll @@ -195,7 +195,10 @@ module Public { } /** A callable with a flow summary. */ - abstract class SummarizedCallable extends DataFlowCallable { + abstract class SummarizedCallable extends SummarizedCallableBase { + bindingset[this] + SummarizedCallable() { any() } + /** * Holds if data may flow from `input` to `output` through this callable. * @@ -493,7 +496,7 @@ module Private { or exists(ParameterPosition pos | parameterReadState(c, state, pos) and - result.(ParamNode).isParameterOf(c, pos) + result.(ParamNode).isParameterOf(inject(c), pos) ) ) } @@ -621,7 +624,7 @@ module Private { predicate summaryPostUpdateNode(Node post, Node pre) { exists(SummarizedCallable c, ParameterPosition pos | isParameterPostUpdate(post, c, pos) and - pre.(ParamNode).isParameterOf(c, pos) + pre.(ParamNode).isParameterOf(inject(c), pos) ) or exists(SummarizedCallable callable, SummaryComponentStack s | @@ -644,7 +647,7 @@ module Private { * node, and back out to `p`. */ predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | p.isParameterOf(c, ppos) | + exists(SummarizedCallable c, ParameterPosition ppos | p.isParameterOf(inject(c), ppos) | exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | summary(c, inputContents, outputContents, _) and inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and @@ -748,13 +751,16 @@ module Private { private predicate viableParam( DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, ParamNode p ) { - p.isParameterOf(sc, ppos) and - sc = viableCallable(call) + exists(DataFlowCallable c | + c = inject(sc) and + p.isParameterOf(c, ppos) and + c = viableCallable(call) + ) } pragma[nomagic] - private ParamNode summaryArgParam0(DataFlowCall call, ArgNode arg) { - exists(ParameterPosition ppos, SummarizedCallable sc | + private ParamNode summaryArgParam0(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { + exists(ParameterPosition ppos | argumentPositionMatch(call, arg, ppos) and viableParam(call, sc, ppos, result) ) @@ -768,9 +774,9 @@ module Private { * or expects contents. */ pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg) { + predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { exists(ParamNode p, Node mid, ParameterPosition ppos, Node ret | - p = summaryArgParam0(_, arg) and + p = summaryArgParam0(_, arg, sc) and p.isParameterOf(_, ppos) and summaryLocalStep(p, mid, true) and summaryLocalStep(mid, ret, true) and @@ -782,27 +788,42 @@ module Private { } bindingset[ret] - private ParamNode summaryArgParam(ArgNode arg, ReturnNodeExt ret, OutNodeExt out) { + private ParamNode summaryArgParam( + ArgNode arg, ReturnNodeExt ret, OutNodeExt out, SummarizedCallable sc + ) { exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam0(call, arg) and - pragma[only_bind_out](ret).getKind() = pragma[only_bind_into](rk) and + result = summaryArgParam0(call, arg, sc) and + ret.getKind() = pragma[only_bind_into](rk) and out = pragma[only_bind_into](rk).getAnOutNode(call) ) } /** - * Holds if `arg` flows to `out` using a simple flow summary, that is, a flow - * summary without reads and stores. + * Holds if `arg` flows to `out` using a simple value-preserving flow + * summary, that is, a flow summary without reads and stores. * * NOTE: This step should not be used in global data-flow/taint-tracking, but may * be useful to include in the exposed local data-flow/taint-tracking relations. */ - predicate summaryThroughStep(ArgNode arg, Node out, boolean preservesValue) { - exists(ReturnNodeExt ret | - summaryLocalStep(summaryArgParam(arg, ret, out), ret, preservesValue) + predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { + exists(ReturnKind rk, ReturnNode ret, DataFlowCall call | + summaryLocalStep(summaryArgParam0(call, arg, sc), ret, true) and + ret.getKind() = pragma[only_bind_into](rk) and + out = getAnOutNode(call, pragma[only_bind_into](rk)) ) } + /** + * Holds if `arg` flows to `out` using a simple flow summary involving taint + * step, that is, a flow summary without reads and stores. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { + exists(ReturnNodeExt ret | summaryLocalStep(summaryArgParam(arg, ret, out, sc), ret, false)) + } + /** * Holds if there is a read(+taint) of `c` from `arg` to `out` using a * flow summary. @@ -810,9 +831,9 @@ module Private { * NOTE: This step should not be used in global data-flow/taint-tracking, but may * be useful to include in the exposed local data-flow/taint-tracking relations. */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out) { + predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { exists(Node mid, ReturnNodeExt ret | - summaryReadStep(summaryArgParam(arg, ret, out), c, mid) and + summaryReadStep(summaryArgParam(arg, ret, out, sc), c, mid) and summaryLocalStep(mid, ret, _) ) } @@ -824,9 +845,9 @@ module Private { * NOTE: This step should not be used in global data-flow/taint-tracking, but may * be useful to include in the exposed local data-flow/taint-tracking relations. */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out) { + predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { exists(Node mid, ReturnNodeExt ret | - summaryLocalStep(summaryArgParam(arg, ret, out), mid, _) and + summaryLocalStep(summaryArgParam(arg, ret, out, sc), mid, _) and summaryStoreStep(mid, c, ret) ) } @@ -910,11 +931,18 @@ module Private { private class SummarizedCallableExternal extends SummarizedCallable { SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } + private predicate relevantSummaryElementGenerated( + AccessPath inSpec, AccessPath outSpec, string kind + ) { + summaryElement(this, inSpec, outSpec, kind, true) and + not summaryElement(this, _, _, _, false) and + not this.clearsContent(_, _) + } + private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { summaryElement(this, inSpec, outSpec, kind, false) or - summaryElement(this, inSpec, outSpec, kind, true) and - not summaryElement(this, _, _, _, false) + this.relevantSummaryElementGenerated(inSpec, outSpec, kind) } override predicate propagatesFlow( @@ -931,7 +959,7 @@ module Private { ) } - override predicate isAutoGenerated() { summaryElement(this, _, _, _, true) } + override predicate isAutoGenerated() { this.relevantSummaryElementGenerated(_, _, _) } } /** Holds if component `c` of specification `spec` cannot be parsed. */ @@ -1067,7 +1095,7 @@ module Private { /** Provides a query predicate for outputting a set of relevant flow summaries. */ module TestOutput { /** A flow summary to include in the `summary/3` query predicate. */ - abstract class RelevantSummarizedCallable extends SummarizedCallable { + abstract class RelevantSummarizedCallable instanceof SummarizedCallable { /** Gets the string representation of this callable used by `summary/1`. */ abstract string getCallableCsv(); @@ -1075,8 +1103,10 @@ module Private { predicate relevantSummary( SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue ) { - this.propagatesFlow(input, output, preservesValue) + super.propagatesFlow(input, output, preservesValue) } + + string toString() { result = super.toString() } } /** Render the kind in the format used in flow summaries. */ @@ -1087,7 +1117,7 @@ module Private { } private string renderGenerated(RelevantSummarizedCallable c) { - if c.isAutoGenerated() then result = "generated:" else result = "" + if c.(SummarizedCallable).isAutoGenerated() then result = "generated:" else result = "" } /** @@ -1117,19 +1147,21 @@ module Private { */ module RenderSummarizedCallable { /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable extends SummarizedCallable { } + abstract class RelevantSummarizedCallable instanceof SummarizedCallable { + string toString() { result = super.toString() } + } private newtype TNodeOrCall = MkNode(Node n) { exists(RelevantSummarizedCallable c | n = summaryNode(c, _) or - n.(ParamNode).isParameterOf(c, _) + n.(ParamNode).isParameterOf(inject(c), _) ) } or MkCall(DataFlowCall call) { call = summaryDataFlowCall(_) and - call.getEnclosingCallable() instanceof RelevantSummarizedCallable + call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) } private class NodeOrCall extends TNodeOrCall { diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll index 89e83626a33..7fa9df72ba2 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll @@ -15,6 +15,12 @@ private import semmle.code.csharp.Unification private import semmle.code.csharp.dataflow.ExternalFlow private import semmle.code.csharp.dataflow.FlowSummary as FlowSummary +class SummarizedCallableBase extends Callable { + SummarizedCallableBase() { this.isUnboundDeclaration() } +} + +DataFlowCallable inject(SummarizedCallable c) { result.asSummarizedCallable() = c } + /** Gets the parameter position of the instance parameter. */ ArgumentPosition instanceParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks @@ -55,7 +61,7 @@ private DataFlowType getReturnTypeBase(DotNet::Callable c, ReturnKind rk) { /** Gets the return type of kind `rk` for callable `c`. */ bindingset[c] DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { - result = getReturnTypeBase(c.asSummarizedCallable(), rk) + result = getReturnTypeBase(c, rk) or rk = any(JumpReturnKind jrk | result = getReturnTypeBase(jrk.getTarget(), jrk.getTargetReturnKind())) @@ -85,9 +91,12 @@ DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { ) } -private predicate summaryElement0( - DotNet::Callable c, string input, string output, string kind, boolean generated -) { +/** + * Holds if an external flow summary exists for `c` with input specification + * `input`, output specification `output`, kind `kind`, and a flag `generated` + * stating whether the summary is autogenerated. + */ +predicate summaryElement(Callable c, string input, string output, string kind, boolean generated) { exists( string namespace, string type, boolean subtypes, string name, string signature, string ext | @@ -96,21 +105,6 @@ private predicate summaryElement0( ) } -private class SummarizedCallableExternal extends FlowSummary::SummarizedCallable { - SummarizedCallableExternal() { summaryElement0(this, _, _, _, _) } -} - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and a flag `generated` - * stating whether the summary is autogenerated. - */ -predicate summaryElement( - DataFlowCallable c, string input, string output, string kind, boolean generated -) { - summaryElement0(c.asSummarizedCallable(), input, output, kind, generated) -} - /** * Holds if an external source specification exists for `e` with output specification * `output`, kind `kind`, and a flag `generated` stating whether the source specification is diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/TaintTrackingPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/TaintTrackingPrivate.qll index 2df41e00299..268c5cd49df 100755 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/TaintTrackingPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/TaintTrackingPrivate.qll @@ -2,6 +2,7 @@ private import csharp private import TaintTrackingPublic private import FlowSummaryImpl as FlowSummaryImpl private import semmle.code.csharp.Caching +private import semmle.code.csharp.dataflow.internal.DataFlowDispatch private import semmle.code.csharp.dataflow.internal.DataFlowPrivate private import semmle.code.csharp.dataflow.internal.ControlFlowReachability private import semmle.code.csharp.dispatch.Dispatch @@ -117,19 +118,22 @@ private module Cached { ( // Simple flow through library code is included in the exposed local // step relation, even though flow is technically inter-procedural - FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false) + FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, + any(DataFlowSummarizedCallable sc)) or // Taint collection by adding a tainted element exists(DataFlow::ElementContent c | storeStep(nodeFrom, c, nodeTo) or - FlowSummaryImpl::Private::Steps::summarySetterStep(nodeFrom, c, nodeTo) + FlowSummaryImpl::Private::Steps::summarySetterStep(nodeFrom, c, nodeTo, + any(DataFlowSummarizedCallable sc)) ) or exists(DataFlow::Content c | readStep(nodeFrom, c, nodeTo) or - FlowSummaryImpl::Private::Steps::summaryGetterStep(nodeFrom, c, nodeTo) + FlowSummaryImpl::Private::Steps::summaryGetterStep(nodeFrom, c, nodeTo, + any(DataFlowSummarizedCallable sc)) | // Taint members c = any(TaintedMember m).(FieldOrProperty).getContent() diff --git a/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll b/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll index 30930f6de18..e62c6c3ee94 100644 --- a/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll +++ b/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll @@ -88,7 +88,10 @@ module EntityFramework { } /** A flow summary for EntityFramework. */ - abstract class EFSummarizedCallable extends SummarizedCallable { } + abstract class EFSummarizedCallable extends SummarizedCallable { + bindingset[this] + EFSummarizedCallable() { any() } + } private class DbSetAddOrUpdateRequiredSummaryComponentStack extends RequiredSummaryComponentStack { override predicate required(SummaryComponent head, SummaryComponentStack tail) { diff --git a/csharp/ql/src/Language Abuse/ForeachCapture.ql b/csharp/ql/src/Language Abuse/ForeachCapture.ql index 7bef3bc3405..b3597418390 100644 --- a/csharp/ql/src/Language Abuse/ForeachCapture.ql +++ b/csharp/ql/src/Language Abuse/ForeachCapture.ql @@ -13,6 +13,7 @@ import csharp import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate import semmle.code.csharp.frameworks.system.Collections import semmle.code.csharp.frameworks.system.collections.Generic @@ -76,7 +77,8 @@ Element getAssignmentTarget(Expr e) { Element getCollectionAssignmentTarget(Expr e) { // Store into collection via method exists(DataFlowPrivate::PostUpdateNode postNode | - FlowSummaryImpl::Private::Steps::summarySetterStep(DataFlow::exprNode(e), _, postNode) and + FlowSummaryImpl::Private::Steps::summarySetterStep(DataFlow::exprNode(e), _, postNode, + any(DataFlowDispatch::DataFlowSummarizedCallable sc)) and result.(Variable).getAnAccess() = postNode.getPreUpdateNode().asExpr() ) or diff --git a/csharp/ql/src/Likely Bugs/LeapYear/UnsafeYearConstruction.ql b/csharp/ql/src/Likely Bugs/LeapYear/UnsafeYearConstruction.ql index fe0c5707742..838406ab82e 100644 --- a/csharp/ql/src/Likely Bugs/LeapYear/UnsafeYearConstruction.ql +++ b/csharp/ql/src/Likely Bugs/LeapYear/UnsafeYearConstruction.ql @@ -20,7 +20,7 @@ class UnsafeYearCreationFromArithmeticConfiguration extends TaintTracking::Confi override predicate isSource(DataFlow::Node source) { exists(ArithmeticOperation ao, PropertyAccess pa | ao = source.asExpr() | pa = ao.getAChild*() and - pa.getProperty().getQualifiedName().matches("System.DateTime.Year") + pa.getProperty().hasQualifiedName("System.DateTime.Year") ) } diff --git a/csharp/ql/test/library-tests/dataflow/content/ContentFlow.cs b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.cs new file mode 100644 index 00000000000..e3db962193f --- /dev/null +++ b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.cs @@ -0,0 +1,51 @@ +using System; + +public class ContentFlow +{ + public class A + { + public A FieldA; + public B FieldB; + } + public class B + { + public A FieldA; + public B FieldB; + } + + public void M(A a, B b) + { + var a1 = new A(); + Sink(Through(a1.FieldA.FieldB)); + + a.FieldA.FieldB = new B(); + Sink(Through(a)); + + var a2 = new A(); + b.FieldB.FieldA = a2.FieldB.FieldA; + Sink(Through(b)); + + Sink(Through(Out())); + + In(new A().FieldA.FieldB); + } + + public static void Sink(T t) { } + + public T Through(T t) + { + Sink(t); + return t; + } + + public void In(T t) + { + Sink(t); + } + + public B Out() + { + var a = new A(); + return a.FieldA.FieldB; + } +} diff --git a/csharp/ql/test/library-tests/dataflow/content/ContentFlow.expected b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.expected new file mode 100644 index 00000000000..ce55c9da51f --- /dev/null +++ b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.expected @@ -0,0 +1,9 @@ +| ContentFlow.cs:18:18:18:24 | object creation of type A | field FieldB.field FieldA. | ContentFlow.cs:19:14:19:38 | call to method Through | | true | +| ContentFlow.cs:18:18:18:24 | object creation of type A | field FieldB.field FieldA. | ContentFlow.cs:37:14:37:14 | access to parameter t | | true | +| ContentFlow.cs:21:27:21:33 | object creation of type B | | ContentFlow.cs:22:14:22:23 | call to method Through | field FieldA.field FieldB. | true | +| ContentFlow.cs:21:27:21:33 | object creation of type B | | ContentFlow.cs:37:14:37:14 | access to parameter t | field FieldA.field FieldB. | true | +| ContentFlow.cs:24:18:24:24 | object creation of type A | field FieldA.field FieldB. | ContentFlow.cs:26:14:26:23 | call to method Through | field FieldB.field FieldA. | true | +| ContentFlow.cs:24:18:24:24 | object creation of type A | field FieldA.field FieldB. | ContentFlow.cs:37:14:37:14 | access to parameter t | field FieldB.field FieldA. | true | +| ContentFlow.cs:30:12:30:18 | object creation of type A | field FieldB.field FieldA. | ContentFlow.cs:43:14:43:14 | access to parameter t | | true | +| ContentFlow.cs:48:17:48:23 | object creation of type A | field FieldB.field FieldA. | ContentFlow.cs:28:14:28:27 | call to method Through | | true | +| ContentFlow.cs:48:17:48:23 | object creation of type A | field FieldB.field FieldA. | ContentFlow.cs:37:14:37:14 | access to parameter t | | true | diff --git a/csharp/ql/test/library-tests/dataflow/content/ContentFlow.ql b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.ql new file mode 100644 index 00000000000..07a510a62ce --- /dev/null +++ b/csharp/ql/test/library-tests/dataflow/content/ContentFlow.ql @@ -0,0 +1,23 @@ +import csharp +import semmle.code.csharp.dataflow.internal.ContentDataFlow + +class Conf extends ContentDataFlow::Configuration { + Conf() { this = "ContentFlowConf" } + + override predicate isSource(DataFlow::Node src) { src.asExpr() instanceof ObjectCreation } + + override predicate isSink(DataFlow::Node sink) { + exists(MethodCall mc | + mc.getTarget().hasUndecoratedName("Sink") and + mc.getAnArgument() = sink.asExpr() + ) + } + + override int accessPathLimit() { result = 2 } +} + +from + Conf conf, ContentDataFlow::Node source, ContentDataFlow::AccessPath sourceAp, + ContentDataFlow::Node sink, ContentDataFlow::AccessPath sinkAp, boolean preservesValue +where conf.hasFlow(source, sourceAp, sink, sinkAp, preservesValue) +select source, sourceAp, sink, sinkAp, preservesValue diff --git a/csharp/ql/test/library-tests/dataflow/external-models/ExternalFlow.ql b/csharp/ql/test/library-tests/dataflow/external-models/ExternalFlow.ql index 316cd8ef0b7..9c570d3534b 100644 --- a/csharp/ql/test/library-tests/dataflow/external-models/ExternalFlow.ql +++ b/csharp/ql/test/library-tests/dataflow/external-models/ExternalFlow.ql @@ -53,6 +53,14 @@ class Conf extends TaintTracking::Configuration { } } +/** + * Simulate that methods with summaries are not included in the source code. + * This is relevant for dataflow analysis using summaries tagged as generated. + */ +private class MyMethod extends Method { + override predicate fromSource() { none() } +} + from DataFlow::PathNode source, DataFlow::PathNode sink, Conf conf where conf.hasFlowPath(source, sink) select sink, source, sink, "$@", source, source.toString() diff --git a/csharp/ql/test/library-tests/dataflow/external-models/steps.expected b/csharp/ql/test/library-tests/dataflow/external-models/steps.expected index fb2e2c9b110..af0e9cdfe4b 100644 --- a/csharp/ql/test/library-tests/dataflow/external-models/steps.expected +++ b/csharp/ql/test/library-tests/dataflow/external-models/steps.expected @@ -20,5 +20,5 @@ summarySetterStep | Steps.cs:34:37:34:37 | 0 | Steps.cs:34:13:34:16 | [post] this access | Steps.cs:63:13:63:20 | property Property | | Steps.cs:38:36:38:36 | 0 | Steps.cs:38:13:38:16 | [post] this access | file://:0:0:0:0 | element | clearsContent -| Steps.cs:61:14:61:28 | StepFieldSetter | Steps.cs:57:13:57:17 | field Field | this | -| Steps.cs:67:14:67:31 | StepPropertySetter | Steps.cs:63:13:63:20 | property Property | this | +| Steps.cs:61:14:61:28 | StepFieldSetter | file://:0:0:0:0 | element | this | +| Steps.cs:67:14:67:31 | StepPropertySetter | file://:0:0:0:0 | element | this | diff --git a/csharp/ql/test/library-tests/dataflow/external-models/steps.ql b/csharp/ql/test/library-tests/dataflow/external-models/steps.ql index 5ebab3be26d..a73c0c8c7c0 100644 --- a/csharp/ql/test/library-tests/dataflow/external-models/steps.ql +++ b/csharp/ql/test/library-tests/dataflow/external-models/steps.ql @@ -2,10 +2,11 @@ import csharp import DataFlow import semmle.code.csharp.dataflow.ExternalFlow import semmle.code.csharp.dataflow.FlowSummary +import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl import CsvValidation -class SummaryModelTest extends SummaryModelCsv { +private class SummaryModelTest extends SummaryModelCsv { override predicate row(string row) { row = [ @@ -28,23 +29,41 @@ class SummaryModelTest extends SummaryModelCsv { } } +private class SummarizedCallableClear extends SummarizedCallable { + SummarizedCallableClear() { + this.getName() = ["StepPropertySetter", "StepFieldSetter"] and + this.getFile().getBaseName() = "Steps.cs" + } + + override predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { + pos.isThisParameter() and + content instanceof DataFlow::ElementContent + } +} + query predicate summaryThroughStep( DataFlow::Node node1, DataFlow::Node node2, boolean preservesValue ) { - FlowSummaryImpl::Private::Steps::summaryThroughStep(node1, node2, preservesValue) + FlowSummaryImpl::Private::Steps::summaryThroughStepValue(node1, node2, + any(DataFlowDispatch::DataFlowSummarizedCallable sc)) and + preservesValue = true + or + FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(node1, node2, + any(DataFlowDispatch::DataFlowSummarizedCallable sc)) and + preservesValue = false } query predicate summaryGetterStep(DataFlow::Node arg, DataFlow::Node out, Content c) { - FlowSummaryImpl::Private::Steps::summaryGetterStep(arg, c, out) + FlowSummaryImpl::Private::Steps::summaryGetterStep(arg, c, out, + any(DataFlowDispatch::DataFlowSummarizedCallable sc)) } query predicate summarySetterStep(DataFlow::Node arg, DataFlow::Node out, Content c) { - FlowSummaryImpl::Private::Steps::summarySetterStep(arg, c, out) + FlowSummaryImpl::Private::Steps::summarySetterStep(arg, c, out, + any(DataFlowDispatch::DataFlowSummarizedCallable sc)) } -query predicate clearsContent( - FlowSummaryImpl::Public::SummarizedCallable c, DataFlow::Content k, ParameterPosition pos -) { +query predicate clearsContent(SummarizedCallable c, DataFlow::Content k, ParameterPosition pos) { c.clearsContent(pos, k) and - c.asSummarizedCallable().fromSource() + c.fromSource() } diff --git a/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql b/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql index 4afa1e44e02..961faf60084 100644 --- a/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql +++ b/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql @@ -3,7 +3,7 @@ private import semmle.code.csharp.dataflow.internal.DataFlowPrivate::Csv private import semmle.code.csharp.dataflow.ExternalFlow class IncludeFilteredSummarizedCallable extends IncludeSummarizedCallable { - IncludeFilteredSummarizedCallable() { exists(this) } + IncludeFilteredSummarizedCallable() { this instanceof SummarizedCallable } /** * Holds if flow is propagated between `input` and `output` and @@ -13,12 +13,11 @@ class IncludeFilteredSummarizedCallable extends IncludeSummarizedCallable { override predicate relevantSummary( SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue ) { - this.propagatesFlow(input, output, preservesValue) and - not exists(IncludeSummarizedCallable rsc, SummarizedCallable sc | - sc = rsc.asSummarizedCallable() and - isBaseCallableOrPrototype(sc) and - rsc.propagatesFlow(input, output, preservesValue) and - this.asSummarizedCallable().(UnboundCallable).overridesOrImplementsUnbound(sc) + this.(SummarizedCallable).propagatesFlow(input, output, preservesValue) and + not exists(IncludeSummarizedCallable rsc | + isBaseCallableOrPrototype(rsc) and + rsc.(SummarizedCallable).propagatesFlow(input, output, preservesValue) and + this.(UnboundCallable).overridesOrImplementsUnbound(rsc) ) } } diff --git a/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql b/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql index a261ea99e22..41cc8379b3d 100644 --- a/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql +++ b/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql @@ -3,7 +3,7 @@ import shared.FlowSummaries import semmle.code.csharp.dataflow.ExternalFlow as ExternalFlow private class IncludeEFSummarizedCallable extends IncludeSummarizedCallable { - IncludeEFSummarizedCallable() { this.asSummarizedCallable() instanceof EFSummarizedCallable } + IncludeEFSummarizedCallable() { this instanceof EFSummarizedCallable } } query predicate sourceNode(DataFlow::Node node, string kind) { diff --git a/csharp/ql/test/shared/FlowSummaries.qll b/csharp/ql/test/shared/FlowSummaries.qll index dc579c4d64c..49744663040 100644 --- a/csharp/ql/test/shared/FlowSummaries.qll +++ b/csharp/ql/test/shared/FlowSummaries.qll @@ -4,12 +4,9 @@ private import semmle.code.csharp.dataflow.internal.DataFlowPrivate abstract class IncludeSummarizedCallable extends RelevantSummarizedCallable { IncludeSummarizedCallable() { - this.asSummarizedCallable() = - any(Callable c | [c.(Modifiable), c.(Accessor).getDeclaration()].isEffectivelyPublic()) + [this.(Modifiable), this.(Accessor).getDeclaration()].isEffectivelyPublic() } /** Gets a string representing the callable in semi-colon separated format for use in flow summaries. */ - final override string getCallableCsv() { - result = Csv::asPartialModel(this.asSummarizedCallable()) - } + final override string getCallableCsv() { result = Csv::asPartialModel(this) } } diff --git a/docs/codeql/codeql-cli/about-ql-packs.rst b/docs/codeql/codeql-cli/about-ql-packs.rst index 11326387b67..6568cf0a3e6 100644 --- a/docs/codeql/codeql-cli/about-ql-packs.rst +++ b/docs/codeql/codeql-cli/about-ql-packs.rst @@ -6,11 +6,8 @@ About QL packs QL packs are used to organize the files used in CodeQL analysis. They contain queries, library files, query suites, and important metadata. -The `CodeQL repository `__ contains QL packs for -C/C++, C#, Java, JavaScript, Python, and Ruby. The `CodeQL for Go -`__ repository contains a QL pack for Go -analysis. You can also make custom QL packs to contain your own queries and -libraries. +The `CodeQL repository `__ contains standard QL packs for all supported languages. +You can also make custom QL packs to contain your own queries and libraries. QL pack structure ----------------- diff --git a/docs/codeql/codeql-cli/getting-started-with-the-codeql-cli.rst b/docs/codeql/codeql-cli/getting-started-with-the-codeql-cli.rst index 0cf07d574da..f859f07cb7c 100644 --- a/docs/codeql/codeql-cli/getting-started-with-the-codeql-cli.rst +++ b/docs/codeql/codeql-cli/getting-started-with-the-codeql-cli.rst @@ -193,8 +193,7 @@ further options on the command line. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `CodeQL repository `__ contains -the queries and libraries required for CodeQL analysis of C/C++, C#, Java, -JavaScript/TypeScript, Python, and Ruby. +the queries and libraries required for CodeQL analysis of all supported languages. Clone a copy of this repository into ``codeql-home``. By default, the root of the cloned repository will be called ``codeql``. @@ -203,17 +202,18 @@ CLI that you will extract in step 4. If you use git on the command line, you can clone and rename the repository in a single step by running ``git clone git@github.com:github/codeql.git codeql-repo`` in the ``codeql-home`` folder. -The CodeQL libraries and queries for Go analysis live in the `CodeQL for Go -repository `__. Clone a copy of this -repository into ``codeql-home``, and run ``codeql-go/scripts/install-deps.sh`` -to install its dependencies. +For Go analysis, run ``codeql-repo/go/scripts/install-deps.sh`` to install its dependencies. -The cloned repositories should have a sibling relationship. -For example, if the root of the cloned CodeQL repository is -``$HOME/codeql-home/codeql-repo``, then the root of the cloned CodeQL for Go -repository should be ``$HOME/codeql-home/codeql-go``. +.. pull-quote:: Note -Within these repositories, the queries and libraries are organized into QL + The CodeQL libraries and queries for Go analysis used to live in a + separate `CodeQL for Go repository `__. + These have been moved to the ``github/codeql`` repository. + It is no longer necessary to clone the ``github/codeql-go`` into a separate ``codeql-home/codeql-go`` folder. + + For more information, see the `Relocation announcement `__. + +Within this repository, the queries and libraries are organized into QL packs. Along with the queries themselves, QL packs contain important metadata that tells the CodeQL CLI how to process the query files. For more information, see ":doc:`About QL packs `." diff --git a/docs/codeql/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code.rst b/docs/codeql/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code.rst index f3338283ba7..b04316f6296 100644 --- a/docs/codeql/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code.rst +++ b/docs/codeql/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code.rst @@ -28,7 +28,7 @@ Viewing the abstract syntax tree of a source file .. pull-quote:: Note - If you don't have an appropriate ``printAST.ql`` query in your workspace, the **CodeQL: View AST** command won't work. To fix this, you can update your copy of the `CodeQL `__ repository (or `CodeQL for Go `__ repository) from ``main``. If you do this, you may need to upgrade your databases. Also, query caches may be discarded and your next query runs could be slower. + If you don't have an appropriate ``printAST.ql`` query in your workspace, the **CodeQL: View AST** command won't work. To fix this, you can update your copy of the `CodeQL `__ repository from ``main``. If you do this, you may need to upgrade your databases. Also, query caches may be discarded and your next query runs could be slower. 3. Once the query has run, the AST viewer displays the structure of the source file. 4. To see the nested structure, click the arrows and expand the nodes. diff --git a/docs/codeql/codeql-for-visual-studio-code/setting-up-codeql-in-visual-studio-code.rst b/docs/codeql/codeql-for-visual-studio-code/setting-up-codeql-in-visual-studio-code.rst index a43c69edecb..b632a86af4c 100644 --- a/docs/codeql/codeql-for-visual-studio-code/setting-up-codeql-in-visual-studio-code.rst +++ b/docs/codeql/codeql-for-visual-studio-code/setting-up-codeql-in-visual-studio-code.rst @@ -69,8 +69,7 @@ There are two ways to do this: This ensures that the queries and libraries you write in VS Code also work in the query console on LGTM Enterprise. If you prefer to add the CodeQL queries and libraries to an :ref:`existing workspace ` instead of the starter workspace, then you should - clone the appropriate branch of the `general CodeQL repository `__ and the - `CodeQL repository for Go `__ and add them to your workspace. + clone the appropriate branch of the `CodeQL repository `__ and add it to your workspace. .. _starter-workspace: @@ -78,8 +77,7 @@ Using the starter workspace ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The starter workspace is a Git repository. It contains: -* The `repository of CodeQL libraries and queries `__ for C/C++, C#, Java, JavaScript, Python, and Ruby. This is included as a submodule, so it can be updated without affecting your custom queries. -* The `repository of CodeQL libraries and queries `__ for Go. This is also included as a submodule. +* The `repository of CodeQL libraries and queries `__ all supported languages. This is included as a submodule, so it can be updated without affecting your custom queries. * A series of folders named ``codeql-custom-queries-``. These are ready for you to start developing your own custom queries for each language, using the standard libraries. There are some example queries to get you started. To use the starter workspace: @@ -114,10 +112,6 @@ For example, to make a custom CodeQL folder called ``my-custom-cpp-pack`` depend For more information about why you need to add a ``qlpack.yml`` file, see ":ref:`About QL packs `." -.. pull-quote:: Note - - The CodeQL libraries for Go are not included in the ``github/codeql`` repository, but are stored separately. To analyze Go projects, clone the repository at https://github.com/github/codeql-go and add it to your workspace as above. - Further reading ---------------- diff --git a/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst b/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst index 900f4a757c1..e8b1880b772 100644 --- a/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst +++ b/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst @@ -7,8 +7,8 @@ When analyzing a Go program, CodeQL does not examine the source code for external packages. To track the flow of untrusted data through a library, you can create a model of the library. -You can find existing models in the ``ql/lib/semmle/go/frameworks/`` folder of the -`CodeQL for Go repository `__. +You can find existing models in the ``go/ql/lib/semmle/go/frameworks/`` folder of the +`CodeQL repository `__. To add a new model, you should make a new file in that folder, named after the library. Sources @@ -102,8 +102,8 @@ Data-flow sinks are specified by queries rather than by library models. However, you can use library models to indicate when functions belong to special categories. Queries can then use these categories when specifying sinks. Classes representing these special categories are contained in -``ql/lib/semmle/go/Concepts.qll`` in the `CodeQL for Go repository -`__. +``go/ql/lib/semmle/go/Concepts.qll`` in the `CodeQL repository +`__. ``Concepts.qll`` includes classes for logger mechanisms, HTTP response writers, HTTP redirects, and marshaling and unmarshaling functions. diff --git a/docs/codeql/query-help/go.rst b/docs/codeql/query-help/go.rst index e81d759f807..9e3050f74d0 100644 --- a/docs/codeql/query-help/go.rst +++ b/docs/codeql/query-help/go.rst @@ -3,6 +3,6 @@ CodeQL query help for Go .. include:: ../reusables/query-help-overview.rst -For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL for Go repository `__. +For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository `__. .. include:: toc-go.rst diff --git a/docs/codeql/reusables/go-further-reading.rst b/docs/codeql/reusables/go-further-reading.rst index 275d3c85c3e..cf8ee287b7c 100644 --- a/docs/codeql/reusables/go-further-reading.rst +++ b/docs/codeql/reusables/go-further-reading.rst @@ -1,3 +1,3 @@ -- `CodeQL queries for Go `__ -- `Example queries for Go `__ +- `CodeQL queries for Go `__ +- `Example queries for Go `__ - `CodeQL library reference for Go `__ diff --git a/docs/codeql/writing-codeql-queries/about-codeql-queries.rst b/docs/codeql/writing-codeql-queries/about-codeql-queries.rst index fc7da3b67bb..be5bca75622 100644 --- a/docs/codeql/writing-codeql-queries/about-codeql-queries.rst +++ b/docs/codeql/writing-codeql-queries/about-codeql-queries.rst @@ -121,7 +121,7 @@ Select clauses for diagnostic queries (``@kind diagnostic``) and summary metric Viewing the standard CodeQL queries *********************************** -One of the easiest ways to get started writing your own queries is to modify an existing query. To view the standard CodeQL queries, or to try out other examples, visit the `CodeQL `__ and `CodeQL for Go `__ repositories on GitHub. +One of the easiest ways to get started writing your own queries is to modify an existing query. To view the standard CodeQL queries, or to try out other examples, visit the `CodeQL `__ repository on GitHub. You can also find examples of queries developed to find security vulnerabilities and bugs in open source software projects on the `GitHub Security Lab website `__ and in the associated `repository `__. diff --git a/docs/codeql/writing-codeql-queries/creating-path-queries.rst b/docs/codeql/writing-codeql-queries/creating-path-queries.rst index 4eec766d488..c41b67d427c 100644 --- a/docs/codeql/writing-codeql-queries/creating-path-queries.rst +++ b/docs/codeql/writing-codeql-queries/creating-path-queries.rst @@ -116,7 +116,7 @@ Declaring sources and sinks You must provide information about the ``source`` and ``sink`` in your path query. These are objects that correspond to the nodes of the paths that you are exploring. The name and the type of the ``source`` and the ``sink`` must be declared in the ``from`` statement of the query, and the types must be compatible with the nodes of the graph computed by the ``edges`` predicate. -If you are querying C/C++, C#, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement. +If you are querying C/C++, C#, Go, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement. For example: .. code-block:: ql diff --git a/go/CODE_OF_CONDUCT.md b/go/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000..5430dd75637 --- /dev/null +++ b/go/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at opensource@github.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct/ + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq/ diff --git a/go/CONTRIBUTING.md b/go/CONTRIBUTING.md new file mode 100644 index 00000000000..1b4868b9dba --- /dev/null +++ b/go/CONTRIBUTING.md @@ -0,0 +1,66 @@ +## Contributing + +Hi there! We're thrilled that you'd like to contribute to this project. Your help is essential for keeping it great. + +Contributions to this project are [released](https://docs.github.com/en/github/site-policy/github-terms-of-service#6-contributions-under-repository-license) to the public under the [project's open source license](LICENSE). + +Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. + +## Building and testing + +1. Install the CodeQL CLI as described in [Getting started with the CodeQL CLI](https://codeql.github.com/docs/codeql-cli/getting-started-with-the-codeql-cli/). + +2. Ensure that `/codeql` is in your `PATH`. + +3. Clone this repository (`github/codeql`) into `/codeql-repo` and change to the directory `/codeql-repo/go`. + +4. To build, run `make`. + +5. To run the full test suite, run `make test`. This will take approximately one hour. + +6. To run all tests in a directory and its subdirectories, run `codeql test run `, for example `codeql test run ql/test/query-tests/Security`. + +7. To run an individual test, run `codeql test run `, where `` is a `.ql` or `.qlref` file, for example `codeql test run ql/test/query-tests/Security/CWE-020/MissingRegexpAnchor/MissingRegexpAnchor.qlref`. + +## Adding a new query + +If you have an idea for a query that you would like to share with other CodeQL users, please open a pull request to add it to this repository. +Follow the steps below to help other users understand what your query does, and to ensure that your query is consistent with the other CodeQL queries. + +1. **Consult the documentation for query writers** + + There is lots of useful documentation to help you write CodeQL queries, ranging from information about query file structure to language-specific tutorials. For more information on the documentation available, see [Writing QL queries](https://codeql.github.com/docs/writing-codeql-queries/) on [codeql.github.com/docs](https://codeql.github.com/docs/). + +2. **Format your code correctly** + + All of the standard CodeQL queries and libraries are uniformly formatted for clarity and consistency, so we strongly recommend that all contributions follow the same formatting guidelines. If you use the CodeQL extension for Visual Studio Code, you can auto-format your query using the [Format Document command](https://codeql.github.com/docs/codeql-for-visual-studio-code/about-codeql-for-visual-studio-code/). For more information, see the [QL style guide](https://github.com/github/codeql/blob/main/docs/ql-style-guide.md). + +3. **Make sure your query has the correct metadata** + + Query metadata is used to identify your query and make sure the query results are displayed properly. + The most important metadata to include are the `@name`, `@description`, and the `@kind`. + Other metadata properties (`@precision`, `@severity`, and `@tags`) are usually added after the query has been reviewed by the maintainers. + For more information on writing query metadata, see the [Query metadata style guide](https://github.com/github/codeql/blob/main/docs/query-metadata-style-guide.md). + +4. **Make sure the `select` statement is compatible with the query type** + + The `select` statement of your query must be compatible with the query type (determined by the `@kind` metadata property) for alert or path results to be displayed correctly in LGTM and Visual Studio Code. + For more information on `select` statement format, see [About CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/about-codeql-queries/#select-clause) on codeql.github.com. + +5. **Write a query help file** + + Query help files explain the purpose of your query to other users. Write your query help in a `.qhelp` file and save it in the same directory as your new query. + For more information on writing query help, see the [Query help style guide](https://github.com/github/codeql/blob/main/docs/query-help-style-guide.md). + +6. **Maintain backwards compatibility** + +The standard CodeQL libraries must evolve in a backwards compatible manner. If any backwards incompatible changes need to be made, the existing API must first be marked as deprecated. This is done by adding a `deprecated` annotation along with a QLDoc reference to the replacement API. Only after at least one full release cycle has elapsed may the old API be removed. + +In addition to contributions to our standard queries and libraries, we also welcome contributions of a more experimental nature, which do not need to fulfill all the requirements listed above. See the guidelines for [experimental queries and libraries](ql/docs/experimental.md) for details. + +## Resources + +- [How to Contribute to Open Source](https://opensource.guide/how-to-contribute/) +- [Using Pull Requests](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) +- [GitHub Help](https://docs.github.com/en) +- [A Note About Git Commit Messages](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) diff --git a/go/LICENSE b/go/LICENSE new file mode 100644 index 00000000000..a8952d86b02 --- /dev/null +++ b/go/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019-2020 GitHub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/go/Makefile b/go/Makefile new file mode 100644 index 00000000000..b7da089c48a --- /dev/null +++ b/go/Makefile @@ -0,0 +1,142 @@ +all: extractor ql/lib/go.dbscheme install-deps + +ifeq ($(OS),Windows_NT) +EXE = .exe +CODEQL_PLATFORM = win64 +else +EXE = +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) +CODEQL_PLATFORM = linux64 +endif +ifeq ($(UNAME_S),Darwin) +CODEQL_PLATFORM = osx64 +endif +endif + +CODEQL_TOOLS = $(addprefix codeql-tools/,autobuild.cmd autobuild.sh pre-finalize.cmd pre-finalize.sh index.cmd index.sh linux64 osx64 win64 tracing-config.lua) + +EXTRACTOR_PACK_OUT = build/codeql-extractor-go + +BINARIES = go-extractor go-tokenizer go-autobuilder go-build-runner go-bootstrap go-gen-dbscheme + +.PHONY: tools tools-codeql tools-codeql-full clean autoformat \ + tools-linux64 tools-osx64 tools-win64 check-formatting + +clean: + rm -rf tools/bin tools/linux64 tools/osx64 tools/win64 tools/net tools/opencsv + rm -rf $(EXTRACTOR_PACK_OUT) build/stats build/testdb + +DATAFLOW_BRANCH=main + +autoformat: + find ql -iregex '.*\.qll?' -print0 | xargs -0 codeql query format -qq -i + find . -path '**/vendor' -prune -or -type f -iname '*.go' ! -empty -print0 | xargs -0 grep -L "//\s*autoformat-ignore" | xargs gofmt -w + +check-formatting: + find ql -iregex '.*\.qll?' -print0 | xargs -0 codeql query format --check-only + test -z "$$(find . -path '**/vendor' -prune -or -type f -iname '*.go' ! -empty -print0 | xargs -0 grep -L "//\s*autoformat-ignore" | xargs gofmt -l)" + +install-deps: + bash scripts/install-deps.sh $(CODEQL_LOCK_MODE) + +ifeq ($(QHELP_OUT_DIR),) +# If not otherwise specified, compile qhelp to markdown in place +QHELP_OUT_DIR := ql/src +endif + +qhelp-to-markdown: + scripts/qhelp-to-markdown.sh ql/src "$(QHELP_OUT_DIR)" + +tools: $(addsuffix $(EXE),$(addprefix tools/bin/,$(BINARIES))) tools/tokenizer.jar + +.PHONY: $(addsuffix $(EXE),$(addprefix tools/bin/,$(BINARIES))) +$(addsuffix $(EXE),$(addprefix tools/bin/,$(BINARIES))): + go build -mod=vendor -o $@ ./extractor/cli/$(basename $(@F)) + +tools-codeql: tools-$(CODEQL_PLATFORM) + +tools-codeql-full: tools-linux64 tools-osx64 tools-win64 + +tools-linux64: $(addprefix tools/linux64/,$(BINARIES)) + +.PHONY: $(addprefix tools/linux64/,$(BINARIES)) +$(addprefix tools/linux64/,$(BINARIES)): + GOOS=linux GOARCH=amd64 go build -mod=vendor -o $@ ./extractor/cli/$(@F) + +tools-osx64: $(addprefix tools/osx64/,$(BINARIES)) + +.PHONY: $(addprefix tools/osx64/,$(BINARIES)) +$(addprefix tools/osx64/,$(BINARIES)): + GOOS=darwin GOARCH=amd64 go build -mod=vendor -o $@ ./extractor/cli/$(@F) + +tools-win64: $(addsuffix .exe,$(addprefix tools/win64/,$(BINARIES))) + +.PHONY: $(addsuffix .exe,$(addprefix tools/win64/,$(BINARIES))) +$(addsuffix .exe,$(addprefix tools/win64/,$(BINARIES))): + env GOOS=windows GOARCH=amd64 go build -mod=vendor -o $@ ./extractor/cli/$(basename $(@F)) + +.PHONY: extractor-common extractor extractor-full install-deps +extractor-common: codeql-extractor.yml LICENSE ql/lib/go.dbscheme \ + tools/tokenizer.jar $(CODEQL_TOOLS) + rm -rf $(EXTRACTOR_PACK_OUT) + mkdir -p $(EXTRACTOR_PACK_OUT) + cp codeql-extractor.yml LICENSE ql/lib/go.dbscheme ql/lib/go.dbscheme.stats $(EXTRACTOR_PACK_OUT) + mkdir $(EXTRACTOR_PACK_OUT)/tools + cp -r tools/tokenizer.jar $(CODEQL_TOOLS) $(EXTRACTOR_PACK_OUT)/tools + +extractor: extractor-common tools-codeql + cp -r tools/$(CODEQL_PLATFORM) $(EXTRACTOR_PACK_OUT)/tools + +extractor-full: extractor-common tools-codeql-full + cp -r $(addprefix tools/,linux64 osx64 win64) $(EXTRACTOR_PACK_OUT)/tools + +tools/tokenizer.jar: tools/net/sourceforge/pmd/cpd/GoLanguage.class + jar cf $@ -C tools net + jar uf $@ -C tools opencsv + +tools/net/sourceforge/pmd/cpd/GoLanguage.class: extractor/net/sourceforge/pmd/cpd/GoLanguage.java + javac -cp extractor -d tools $< + rm tools/net/sourceforge/pmd/cpd/AbstractLanguage.class + rm tools/net/sourceforge/pmd/cpd/SourceCode.class + rm tools/net/sourceforge/pmd/cpd/TokenEntry.class + rm tools/net/sourceforge/pmd/cpd/Tokenizer.class + +ql/lib/go.dbscheme: tools/$(CODEQL_PLATFORM)/go-gen-dbscheme$(EXE) + $< $@ + +build/stats/src.stamp: + mkdir -p $(@D)/src + git clone 'https://github.com/golang/tools' $(@D)/src + git -C $(@D)/src checkout 9b52d559c609 -q + touch $@ + +ql/lib/go.dbscheme.stats: ql/lib/go.dbscheme build/stats/src.stamp extractor + rm -rf build/stats/database + codeql database create -l go -s build/stats/src -j4 --search-path . build/stats/database + codeql dataset measure -o $@ build/stats/database/db-go + +test: all build/testdb/check-upgrade-path + codeql test run ql/test --search-path build/codeql-extractor-go --consistency-queries ql/test/consistency + # use GOOS=linux because GOOS=darwin GOARCH=386 is no longer supported + env GOOS=linux GOARCH=386 codeql$(EXE) test run ql/test/query-tests/Security/CWE-681 --search-path build/codeql-extractor-go --consistency-queries ql/test/consistency + cd extractor; go test -mod=vendor ./... | grep -vF "[no test files]" + bash extractor-smoke-test/test.sh || (echo "Extractor smoke test FAILED"; exit 1) + +.PHONY: build/testdb/check-upgrade-path +build/testdb/check-upgrade-path : build/testdb/go.dbscheme ql/lib/go.dbscheme + codeql dataset upgrade build/testdb --search-path ql/lib + diff -q build/testdb/go.dbscheme ql/lib/go.dbscheme + +.PHONY: build/testdb/go.dbscheme +build/testdb/go.dbscheme: ql/lib/upgrades/initial/go.dbscheme + rm -rf build/testdb + echo >build/empty.trap + codeql dataset import -S ql/lib/upgrades/initial/go.dbscheme build/testdb build/empty.trap + +.PHONY: sync-dataflow-libraries +sync-dataflow-libraries: + for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll DataFlowImplConsistency.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll FlowSummaryImpl.qll AccessPathSyntax.qll;\ + do\ + curl -s -o ./ql/lib/semmle/go/dataflow/internal/$$f https://raw.githubusercontent.com/github/codeql/$(DATAFLOW_BRANCH)/java/ql/lib/semmle/code/java/dataflow/internal/$$f;\ + done diff --git a/go/README.md b/go/README.md new file mode 100644 index 00000000000..6569eec3da9 --- /dev/null +++ b/go/README.md @@ -0,0 +1,54 @@ +# Go analysis support for CodeQL + +This open-source repository contains the extractor, CodeQL libraries, and queries that power Go +support in [LGTM](https://lgtm.com) and the other CodeQL products that [GitHub](https://github.com) +makes available to its customers worldwide. + +It contains two major components: + - an extractor, itself written in Go, that parses Go source code and converts it into a database + that can be queried using CodeQL. + - static analysis libraries and queries written in [CodeQL](https://codeql.github.com/docs/) that can be + used to analyze such a database to find coding mistakes or security vulnerabilities. + +The goal of this project is to provide comprehensive static analysis support for Go in CodeQL. + +For the queries and libraries that power CodeQL support for other languages, visit [the CodeQL +repository](https://github.com/github/codeql). + +## Installation + +Clone this repository. + +Run `scripts/install-deps.sh`. This will ensure that the necessary external CodeQL packs are +downloaded to your machine. You will need to re-run this script whenever you pull new commits from +the repo. + +If you want to use the CodeQL extension for Visual Studio Code, import this repository into your VS +Code workspace. + +## Usage + +To analyze a Go codebase, either use the [CodeQL command-line +interface](https://codeql.github.com/docs/codeql-cli/) to create a database yourself, or +download a pre-built database from [LGTM.com](https://lgtm.com/). You can then run any of the +queries contained in this repository either on the command line or using the VS Code extension. + +Note that the [lgtm.com](https://github.com/github/codeql/tree/lgtm.com) branch of this +repository corresponds to the version of the queries that is currently deployed on LGTM.com. +The [main](https://github.com/github/codeql/tree/main) branch may contain changes that +have not been deployed yet, so you may need to upgrade databases downloaded from LGTM.com before +running queries on them. + +## Contributions + +Contributions are welcome! Please see our [contribution guidelines](CONTRIBUTING.md) and our +[code of conduct](CODE_OF_CONDUCT.md) for details on how to participate in our community. + +## Licensing + +The code in this repository is licensed under the [MIT license](LICENSE). + +## Resources + +- [Writing CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/codeql-queries/) +- [Learning CodeQL](https://codeql.github.com/docs/writing-codeql-queries/ql-tutorials/) diff --git a/go/SECURITY.md b/go/SECURITY.md new file mode 100644 index 00000000000..58767720f29 --- /dev/null +++ b/go/SECURITY.md @@ -0,0 +1,3 @@ +If you discover a security issue in this repo, please submit it through the [GitHub Security Bug Bounty](https://hackerone.com/github). + +Thanks for helping make CodeQL safe for everyone. \ No newline at end of file diff --git a/go/alert_weighting.properties b/go/alert_weighting.properties new file mode 100644 index 00000000000..0b7b5d90ac5 --- /dev/null +++ b/go/alert_weighting.properties @@ -0,0 +1,3 @@ +precision = ("veryhigh", "high", "medium", "low") +severity = ("error", "warning", "recommendation") +security = ("true", "false") diff --git a/go/build/.gitkeep b/go/build/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/go/codeql-extractor.yml b/go/codeql-extractor.yml new file mode 100644 index 00000000000..b258b193f56 --- /dev/null +++ b/go/codeql-extractor.yml @@ -0,0 +1,14 @@ +name: "go" +display_name: "Go" +version: 0.1.0 +pull_request_triggers: + - "**/go.mod" + - "**/glide.yaml" + - "**/Gopkg.toml" +column_kind: "utf8" +file_types: + - name: go + display_name: Go + extensions: + - .go +legacy_qltest_extraction: true diff --git a/go/codeql-tools/autobuild.cmd b/go/codeql-tools/autobuild.cmd new file mode 100644 index 00000000000..aed999876e4 --- /dev/null +++ b/go/codeql-tools/autobuild.cmd @@ -0,0 +1,15 @@ +@echo off +SETLOCAL EnableDelayedExpansion + +rem Some legacy environment variables for the autobuilder. +set LGTM_SRC=%CD% + +if "%CODEQL_EXTRACTOR_GO_BUILD_TRACING%"=="on" ( + echo "Tracing enabled" + type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/%CODEQL_PLATFORM%/go-build-runner.exe" +) else ( + type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/%CODEQL_PLATFORM%/go-autobuilder.exe" +) +exit /b %ERRORLEVEL% + +ENDLOCAL diff --git a/go/codeql-tools/autobuild.sh b/go/codeql-tools/autobuild.sh new file mode 100755 index 00000000000..585152e676e --- /dev/null +++ b/go/codeql-tools/autobuild.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -eu + +if [ "$CODEQL_PLATFORM" != "linux64" ] && [ "$CODEQL_PLATFORM" != "osx64" ] ; then + echo "Automatic build detection for $CODEQL_PLATFORM is not implemented." + exit 1 +fi + +# Some legacy environment variables used by the autobuilder. +LGTM_SRC="$(pwd)" +export LGTM_SRC + +if [ "${CODEQL_EXTRACTOR_GO_BUILD_TRACING:-}" = "on" ]; then + echo "Tracing enabled" + "$CODEQL_EXTRACTOR_GO_ROOT/tools/$CODEQL_PLATFORM/go-build-runner" +else + "$CODEQL_EXTRACTOR_GO_ROOT/tools/$CODEQL_PLATFORM/go-autobuilder" +fi diff --git a/go/codeql-tools/index.cmd b/go/codeql-tools/index.cmd new file mode 100644 index 00000000000..21c8f64df92 --- /dev/null +++ b/go/codeql-tools/index.cmd @@ -0,0 +1,8 @@ +@echo off +SETLOCAL EnableDelayedExpansion + +type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/%CODEQL_PLATFORM%/go-extractor.exe" -mod=vendor ./... +type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/pre-finalize.cmd" +exit /b %ERRORLEVEL% + +ENDLOCAL diff --git a/go/codeql-tools/index.sh b/go/codeql-tools/index.sh new file mode 100755 index 00000000000..877400d37f2 --- /dev/null +++ b/go/codeql-tools/index.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -eu + +if [ "$CODEQL_PLATFORM" != "linux64" ] && [ "$CODEQL_PLATFORM" != "osx64" ] ; then + echo "Automatic build detection for $CODEQL_PLATFORM is not implemented." + exit 1 +fi + +"$CODEQL_EXTRACTOR_GO_ROOT/tools/$CODEQL_PLATFORM/go-extractor" -mod=vendor ./... +"$CODEQL_EXTRACTOR_GO_ROOT/tools/pre-finalize.sh" diff --git a/go/codeql-tools/linux64/compiler-tracing.spec b/go/codeql-tools/linux64/compiler-tracing.spec new file mode 100644 index 00000000000..2055555c21a --- /dev/null +++ b/go/codeql-tools/linux64/compiler-tracing.spec @@ -0,0 +1,7 @@ +**/go-autobuilder: + order compiler + trace no +**/go: + invoke ${config_dir}/go-extractor + prepend --mimic + prepend "${compiler}" diff --git a/go/codeql-tools/osx64/compiler-tracing.spec b/go/codeql-tools/osx64/compiler-tracing.spec new file mode 100644 index 00000000000..2055555c21a --- /dev/null +++ b/go/codeql-tools/osx64/compiler-tracing.spec @@ -0,0 +1,7 @@ +**/go-autobuilder: + order compiler + trace no +**/go: + invoke ${config_dir}/go-extractor + prepend --mimic + prepend "${compiler}" diff --git a/go/codeql-tools/pre-finalize.cmd b/go/codeql-tools/pre-finalize.cmd new file mode 100644 index 00000000000..4abac249933 --- /dev/null +++ b/go/codeql-tools/pre-finalize.cmd @@ -0,0 +1,19 @@ +@echo off +SETLOCAL EnableDelayedExpansion + +if NOT "%CODEQL_EXTRACTOR_GO_EXTRACT_HTML%"=="no" ( + type NUL && "%CODEQL_DIST%/codeql.exe" database index-files ^ + --working-dir=. ^ + --include-extension=.htm ^ + --include-extension=.html ^ + --include-extension=.xhtm ^ + --include-extension=.xhtml ^ + --include-extension=.vue ^ + --size-limit 10m ^ + --language html ^ + -- ^ + "%CODEQL_EXTRACTOR_GO_WIP_DATABASE%" ^ + || echo "HTML extraction failed; continuing" + + exit /b %ERRORLEVEL% +) diff --git a/go/codeql-tools/pre-finalize.sh b/go/codeql-tools/pre-finalize.sh new file mode 100755 index 00000000000..3a8b31c70a0 --- /dev/null +++ b/go/codeql-tools/pre-finalize.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +set -eu + +if [ "${CODEQL_EXTRACTOR_GO_EXTRACT_HTML:-yes}" != "no" ]; then + "$CODEQL_DIST/codeql" database index-files \ + --working-dir=. \ + --include-extension=.htm \ + --include-extension=.html \ + --include-extension=.xhtm \ + --include-extension=.xhtml \ + --include-extension=.vue \ + --size-limit 10m \ + --language html \ + -- \ + "$CODEQL_EXTRACTOR_GO_WIP_DATABASE" \ + || echo "HTML extraction failed; continuing." +fi diff --git a/go/codeql-tools/tracing-config.lua b/go/codeql-tools/tracing-config.lua new file mode 100644 index 00000000000..8554d545ce0 --- /dev/null +++ b/go/codeql-tools/tracing-config.lua @@ -0,0 +1,29 @@ +function RegisterExtractorPack() + local goExtractor = GetPlatformToolsDirectory() .. 'go-extractor' + local patterns = { + CreatePatternMatcher({'^go-autobuilder$'}, MatchCompilerName, nil, + {trace = false}), + CreatePatternMatcher({'^go$'}, MatchCompilerName, goExtractor, { + prepend = {'--mimic', '${compiler}'}, + order = ORDER_BEFORE + }) + + } + if OperatingSystem == 'windows' then + goExtractor = goExtractor .. 'go-extractor.exe' + patterns = { + CreatePatternMatcher({'^go-autobuilder%.exe$'}, MatchCompilerName, + nil, {trace = false}), + CreatePatternMatcher({'^go%.exe$'}, MatchCompilerName, goExtractor, + { + prepend = {'--mimic', '"${compiler}"'}, + order = ORDER_BEFORE + }) + } + end + return patterns +end + +-- Return a list of minimum supported versions of the configuration file format +-- return one entry per supported major version. +function GetCompatibleVersions() return {'1.0.0'} end diff --git a/go/codeql-tools/win64/compiler-tracing.spec b/go/codeql-tools/win64/compiler-tracing.spec new file mode 100644 index 00000000000..76a6b011405 --- /dev/null +++ b/go/codeql-tools/win64/compiler-tracing.spec @@ -0,0 +1,7 @@ +**/go-autobuilder.exe: + order compiler + trace no +**/go.exe: + invoke ${config_dir}/go-extractor.exe + prepend --mimic + prepend "${compiler}" diff --git a/go/docs/language/learn-ql/go/ast-class-reference.rst b/go/docs/language/learn-ql/go/ast-class-reference.rst new file mode 100644 index 00000000000..d874652a894 --- /dev/null +++ b/go/docs/language/learn-ql/go/ast-class-reference.rst @@ -0,0 +1,490 @@ +Abstract syntax tree classes for working with Go programs +========================================================= + +CodeQL has a large selection of classes for representing the abstract syntax tree of Go programs. + +.. include:: ../../reusables/abstract-syntax-tree.rst + +Statement classes +----------------- + +This table lists all subclasses of `Stmt `__. + ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Statement syntax | CodeQL class | Superclasses | Remarks | ++===================================================================================================================+===================================================================================================================+===============================================================================================================+===================================================================================================================+ +| ``;`` | EmptyStmt_ | | | +| | | | | +| | .. _EmptyStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$EmptyStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ | ExprStmt_ | | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _ExprStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ExprStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``{`` Stmt_ ``...`` ``}`` | BlockStmt_ | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``if`` Expr_ BlockStmt_ | IfStmt_ | | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _IfStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$IfStmt.html | | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``if`` Expr_ BlockStmt_ ``else`` Stmt_ | | | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``if`` Stmt_\ ``;`` Expr_ BlockStmt_ | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``for`` Expr_ BlockStmt_ | ForStmt_ | LoopStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _ForStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ForStmt.html | .. _LoopStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$LoopStmt.html | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``for`` Stmt_\ ``;`` Expr_\ ``;`` Stmt_ BlockStmt_ | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``for`` Expr_ ``...`` ``=`` ``range`` Expr_ BlockStmt_ | RangeStmt_ | LoopStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _RangeStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$RangeStmt.html | .. _LoopStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$LoopStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _BlockStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BlockStmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``switch`` Expr_ ``{`` CaseClause_ ``...`` ``}`` | ExpressionSwitchStmt_ | SwitchStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _ExpressionSwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ExpressionSwitchStmt.html | .. _SwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SwitchStmt.html | | +| .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``switch`` Stmt_\ ``;`` Expr_ ``{`` CaseClause_ ``...`` ``}`` | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``switch`` Expr_\ ``.(type)`` ``{`` CaseClause_ ``...`` ``}`` | TypeSwitchStmt_ | SwitchStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _TypeSwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$TypeSwitchStmt.html | .. _SwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SwitchStmt.html | | +| .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``switch`` SimpleAssignStmt_\ ``.(type)`` ``{`` CaseClause_ ``...`` ``}`` | | | | +| | | | | +| .. _SimpleAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SimpleAssignStmt.html | | | | +| .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``switch`` Stmt_\ ``;`` Expr_\ ``.(type)`` ``{`` CaseClause_ ``...`` ``}`` | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | +| .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``select`` ``{`` CommClause_ ``...`` ``}`` | SelectStmt_ | | | +| | | | | +| .. _CommClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CommClause.html | .. _SelectStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SelectStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``return`` | ReturnStmt_ | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``return`` Expr_ ``...`` | .. _ReturnStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ReturnStmt.html | | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``break`` | BreakStmt_ | BranchStmt_ | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``break`` LabelName_ | .. _BreakStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BreakStmt.html | .. _BranchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BranchStmt.html | | +| | | | | +| .. _LabelName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$LabelName.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``continue`` | ContinueStmt_ | BranchStmt_ | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``continue`` LabelName_ | .. _ContinueStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ContinueStmt.html | .. _BranchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BranchStmt.html | | +| | | | | +| .. _LabelName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$LabelName.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``goto`` LabelName_ | GotoStmt_ | BranchStmt_ | | +| | | | | +| .. _LabelName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$LabelName.html | .. _GotoStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$GotoStmt.html | .. _BranchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BranchStmt.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``fallthrough`` | FallthroughStmt_ | BranchStmt_ | can only occur as final non-empty child of a CaseClause_ in an ExpressionSwitchStmt_ | +| | | | | +| | .. _FallthroughStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$FallthroughStmt.html | .. _BranchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BranchStmt.html | .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | +| | | | .. _ExpressionSwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ExpressionSwitchStmt.html | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| LabelName_\ ``:`` Stmt_ | LabeledStmt_ | | | +| | | | | +| .. _LabelName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$LabelName.html | .. _LabeledStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$LabeledStmt.html | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``var`` VariableName_ TypeName_ | DeclStmt_ | | | +| | | | | +| .. _VariableName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$VariableName.html | .. _DeclStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$DeclStmt.html | | | +| .. _TypeName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeName.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``const`` VariableName_ ``=`` Expr_ | | | | +| | | | | +| .. _VariableName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$VariableName.html | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``type`` TypeName_ TypeExpr_ | | | | +| | | | | +| .. _TypeName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeName.html | | | | +| .. _TypeExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeExpr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``type`` TypeName_ ``=`` TypeExpr_ | | | | +| | | | | +| .. _TypeName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeName.html | | | | +| .. _TypeExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeExpr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``...`` ``=`` Expr_ ``...`` | AssignStmt_ | SimpleAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _AssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$AssignStmt.html | .. _SimpleAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SimpleAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| VariableName_ ``...`` ``:=`` Expr_ ``...`` | DefineStmt_ | SimpleAssignStmt_, Assignment_ | | +| | | | | +| .. _VariableName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$VariableName.html | .. _DefineStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$DefineStmt.html | .. _SimpleAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SimpleAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``+=`` Expr_ | AddAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _AddAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$AddAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``-=`` Expr_ | SubAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _SubAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SubAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``*=`` Expr_ | MulAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _MulAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$MulAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``/=`` Expr_ | QuoAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _QuoAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$QuoAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``%=`` Expr_ | RemAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _RemAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$RemAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``*=`` Expr_ | MulAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _MulAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$MulAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``&=`` Expr_ | AndAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _AndAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$AndAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``|=`` Expr_ | OrAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _OrAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$OrAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``^=`` Expr_ | XorAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _XorAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$XorAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``<<=`` Expr_ | ShlAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _ShlAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ShlAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``>>=`` Expr_ | ShrAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _ShrAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$ShrAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``&^=`` Expr_ | AndNotAssignStmt_ | CompoundAssignStmt_, Assignment_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _AndNotAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$AndNotAssignStmt.html | .. _CompoundAssignStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CompoundAssignStmt.html | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | .. _Assignment: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Assignment.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``++`` | IncStmt_ | IncDecStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _IncStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$IncStmt.html | .. _IncDecStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$IncDecStmt.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``--`` | DecStmt_ | IncDecStmt_ | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _DecStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$DecStmt.html | .. _IncDecStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$IncDecStmt.html | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``go`` CallExpr_ | GoStmt_ | | | +| | | | | +| .. _CallExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$CallExpr.html | .. _GoStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$GoStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``defer`` CallExpr_ | DeferStmt_ | | | +| | | | | +| .. _CallExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$CallExpr.html | .. _DeferStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$DeferStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``<-`` Expr_ | SendStmt_ | | | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _SendStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SendStmt.html | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``case`` Expr_ ``...``\ ``:`` Stmt_ ``...`` | CaseClause_ | | can only occur as child of a SwitchStmt_ | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _CaseClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CaseClause.html | | .. _SwitchStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SwitchStmt.html | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``case`` TypeExpr_ ``...``\ ``:`` Stmt_ ``...`` | | | | +| | | | | +| .. _TypeExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$TypeExpr.html | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``default:`` Stmt_ ``...`` | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| ``case`` SendStmt_\ ``:`` Stmt_ ``...`` | CommClause_ | | can only occur as child of a SelectStmt_ | +| | | | | +| .. _SendStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SendStmt.html | .. _CommClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CommClause.html | | .. _SelectStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$SelectStmt.html | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``case`` RecvStmt_\ ``:`` Stmt_ ``...`` | | | | +| | | | | +| .. _RecvStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$RecvStmt.html | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| ``default:`` Stmt_ ``...`` | | | | +| | | | | +| .. _Stmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$Stmt.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| Expr_ ``...`` ``=`` RecvExpr_ | RecvStmt_ | | can only occur as child of a CommClause_ | +| | | | | +| .. _Expr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$Expr.html | .. _RecvStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$RecvStmt.html | | .. _CommClause: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$CommClause.html | +| .. _RecvExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$RecvExpr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+ | | | +| VariableName_ ``...`` ``:=`` RecvExpr_ | | | | +| | | | | +| .. _VariableName: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$VariableName.html | | | | +| .. _RecvExpr: https://help.semmle.com/qldoc/go/semmle/go/Expr.qll/type.Expr$RecvExpr.html | | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +| (anything unparseable) | BadStmt_ | | | +| | | | | +| | .. _BadStmt: https://help.semmle.com/qldoc/go/semmle/go/Stmt.qll/type.Stmt$BadStmt.html | | | ++-------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ + +Expression classes +------------------ + +There are many expression classes, so we present them by category. +All classes in this section are subclasses of +`Expr `__. + +Literals +~~~~~~~~ + +All classes in this subsection are subclasses of +`Literal `__. + ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Expression syntax example | CodeQL class | Superclass | ++=========================================+==============================================================================================+==============================================================================================================================================================================================================+ +| ``23`` | `IntLit `__ | `BasicLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``4.2`` | `FloatLit `__ | `BasicLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``4.2 + 2.7i`` | `ImagLit `__ | `BasicLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``'a'`` | `CharLit `__ | `BasicLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``"Hello"`` | `StringLit `__ | `BasicLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``func(x, y int) int { return x + y }`` | `FuncLit `__ | `FuncDef `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``[6]int{1, 2, 3, 5}`` | `ArrayLit `__ | `ArrayOrSliceLit `__, `CompositeLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``[]int{1, 2, 3, 5}`` | `SliceLit `__ | `ArrayOrSliceLit `__, `CompositeLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``map[string]int{"A": 1, "B": 2}`` | `MapLit `__ | `CompositeLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``Point3D{0.5, -0.5, 0.5}`` | `StructLit `__ | `CompositeLit `__ | ++-----------------------------------------+----------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Unary expressions +~~~~~~~~~~~~~~~~~ + +All classes in this subsection are subclasses of +`UnaryExpr `__. + ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Expression syntax | CodeQL class | Superclasses | ++============================================================================================+========================================================================================================+==================================================================================================================+ +| ``+``\ `Expr `__ | `PlusExpr `__ | `ArithmeticUnaryExpr `__ | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| ``-``\ `Expr `__ | `MinusExpr `__ | `ArithmeticUnaryExpr `__ | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| ``!``\ `Expr `__ | `NotExpr `__ | `LogicalUnaryExpr `__ | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| ``^``\ `Expr `__ | `ComplementExpr `__ | `BitwiseUnaryExpr `__ | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| ``&``\ `Expr `__ | `AddressExpr `__ | | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| ``<-``\ `Expr `__ | `RecvExpr `__ | | ++--------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ + +Binary expressions +~~~~~~~~~~~~~~~~~~ + +All classes in this subsection are subclasses of +`BinaryExpr `__. + ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| Expression syntax | CodeQL class | Superclasses | ++==============================================================================================================================================================================+================================================================================================+============================================================================================================================+ +| `Expr `__ ``*`` `Expr `__ | `MulExpr `__ | `ArithmeticBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``/`` `Expr `__ | `QuoExpr `__ | `ArithmeticBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``%`` `Expr `__ | `RemExpr `__ | `ArithmeticBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``+`` `Expr `__ | `AddExpr `__ | `ArithmeticBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``-`` `Expr `__ | `SubExpr `__ | `ArithmeticBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``<<`` `Expr `__ | `ShlExpr `__ | `ShiftExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``>>`` `Expr `__ | `ShrExpr `__ | `ShiftExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``&&`` `Expr `__ | `LandExpr `__ | `LogicalBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``||`` `Expr `__ | `LorExpr `__ | `LogicalBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``<`` `Expr `__ | `LssExpr `__ | `RelationalComparisonExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``>`` `Expr `__ | `GtrExpr `__ | `RelationalComparisonExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``<=`` `Expr `__ | `LeqExpr `__ | `RelationalComparisonExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``>=`` `Expr `__ | `GeqExpr `__ | `RelationalComparisonExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``==`` `Expr `__ | `EqlExpr `__ | `EqualityTestExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``!=`` `Expr `__ | `NeqExpr `__ | `EqualityTestExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``&`` `Expr `__ | `AndExpr `__ | `BitwiseBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``|`` `Expr `__ | `OrExpr `__ | `BitwiseBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``^`` `Expr `__ | `XorExpr `__ | `BitwiseBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__ ``&^`` `Expr `__ | `AndNotExpr `__ | `BitwiseBinaryExpr `__ | ++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+ + +Type expressions +~~~~~~~~~~~~~~~~ + +All classes in this subsection are subclasses of +`TypeExpr `__. + ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| Expression syntax | CodeQL class | Superclasses | ++=========================================================================================================================================================================================================+====================================================================================================================+====================================================================================================+ +| ``[``\ `Expr `__\ ``]`` `TypeExpr `__ | `ArrayTypeExpr `__ | | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``struct { ... }`` | `StructTypeExpr `__ | | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``func`` `FunctionName `__\ ``(...) (...)`` | `FuncTypeExpr `__ | | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``interface { ... }`` | `InterfaceTypeExpr `__ | | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``map[``\ `TypeExpr `__\ ``]``\ `TypeExpr `__ | `MapTypeExpr `__ | | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``chan<-`` `TypeExpr `__ | `SendChanTypeExpr `__ | `ChanTypeExpr `__ | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``<-chan`` `TypeExpr `__ | `RecvChanTypeExpr `__ | `ChanTypeExpr `__ | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| ``chan`` `TypeExpr `__ | `SendRecvChanTypeExpr `__ | `ChanTypeExpr `__ | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ + +Name expressions +~~~~~~~~~~~~~~~~ + +All classes in this subsection are subclasses of +`Name `__. + +The following classes relate to the structure of the name. + ++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| Expression syntax | CodeQL class | Superclasses | ++===================================================================================================================================================================================+======================================================================================================+====================================================================================================+ +| `Ident `__ | `SimpleName `__ | `Ident `__ | ++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ +| `Ident `__\ ``.``\ `Ident `__ | `QualifiedName `__ | `SelectorExpr `__ | ++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+ + +The following classes relate to what sort of entity the name refers to. + + +- `PackageName `__ +- `TypeName `__ +- `LabelName `__ +- `ValueName `__ + + - `ConstantName `__ + - `VariableName `__ + - `FunctionName `__ + +Miscellaneous +~~~~~~~~~~~~~ + ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Expression syntax | CodeQL class | Superclasses | Remarks | ++============================================================================================================================================================================================================================================================================================================================================================================+========================================================================================================+====================================================================================================================+==========================================================================================================================================================================================================================+ +| ``foo`` | `Ident `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``_`` | `BlankIdent `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``...`` | `Ellipsis `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``(``\ `Expr `__\ ``)`` | `ParenExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Ident `__\ ``.``\ `Ident `__ | `SelectorExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__\ ``[``\ `Expr `__\ ``]`` | `IndexExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__\ ``[``\ `Expr `__\ ``:``\ `Expr `__\ ``:``\ `Expr `__\ ``]`` | `SliceExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__\ ``.(``\ `TypeExpr `__\ ``)`` | `TypeAssertExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``*``\ `Expr `__ | `StarExpr `__ | | can be a `ValueExpr `__ or `TypeExpr `__ depending on context | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__\ ``:`` `Expr `__ | `KeyValueExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `TypeExpr `__\ ``(``\ `Expr `__\ ``)`` | `ConversionExpr `__ | `CallOrConversionExpr `__ | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `Expr `__\ ``(...)`` | `CallExpr `__ | `CallOrConversionExpr `__ | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| (anything unparseable) | `BadExpr `__ | | | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +The following classes organize expressions by the kind of entity they refer to. + ++------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| CodeQL class | Explanation | ++======================================================================================================+=========================================================================================================================================================================================================================================================+ +| `TypeExpr `__ | an expression that denotes a type | ++------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `ReferenceExpr `__ | an expression that refers to a variable, a constant, a function, a field, or an element of an array or a slice | ++------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| `ValueExpr `__ | an expression that can be evaluated to a value (as opposed to expressions that refer to a package, a type, or a statement label). This generalizes `ReferenceExpr `__ | ++------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Further reading +--------------- + +.. include:: ../../reusables/go-further-reading.rst +.. include:: ../../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file diff --git a/go/docs/language/learn-ql/go/library-modeling-go.rst b/go/docs/language/learn-ql/go/library-modeling-go.rst new file mode 100644 index 00000000000..ef7f50bf7cb --- /dev/null +++ b/go/docs/language/learn-ql/go/library-modeling-go.rst @@ -0,0 +1,122 @@ +Modeling data flow in Go libraries +================================== + +When analyzing a Go program, CodeQL does not examine the source code for +external packages. To track the flow of untrusted data through a library, you +can create a model of the library. + +You can find existing models in the ``go/ql/lib/semmle/go/frameworks/`` folder of the +`CodeQL repository `__. +To add a new model, you should make a new file in that folder, named after the library. + +Sources +------- + +To mark a source of data that is controlled by an untrusted user, we +create a class extending ``UntrustedFlowSource::Range``. Inheritance and +the characteristic predicate of the class should be used to specify +exactly the dataflow node that introduces the data. Here is a short +example from ``Mux.qll``. + +.. code-block:: ql + + class RequestVars extends DataFlow::UntrustedFlowSource::Range, DataFlow::CallNode { + RequestVars() { this.getTarget().hasQualifiedName("github.com/gorilla/mux", "Vars") } + } + +This has the effect that all calls to `the function Vars from the +package mux `__ are +treated as sources of untrusted data. + +Flow propagation +---------------- + +By default, we assume that all functions in libraries do not have +any data flow. To indicate that a particular function does have data flow, +create a class extending ``TaintTracking::FunctionModel`` (or +``DataFlow::FunctionModel`` if the untrusted user data is passed on +without being modified). + +Inheritance and the characteristic predicate of the class should specify +the function. The class should also have a member predicate with the signature +``override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp)`` +(or +``override predicate hasDataFlow(FunctionInput inp, FunctionOutput outp)`` +if extending ``DataFlow::FunctionModel``). The body should constrain +``inp`` and ``outp``. + +``FunctionInput`` is an abstract representation of the inputs to a +function. The options are: + +* the receiver (``inp.isReceiver()``) +* one of the parameters (``inp.isParameter(i)``) +* one of the results (``inp.isResult(i)``, or ``inp.isResult`` if there is only one result) + +Note that it may seem strange that the result of a function could be +considered as a function input, but it is needed in some cases. For +instance, the function ``bufio.NewWriter`` returns a writer ``bw`` that +buffers write operations to an underlying writer ``w``. If tainted data +is written to ``bw``, then it makes sense to propagate that taint back +to the underlying writer ``w``, which can be modeled by saying that +``bufio.NewWriter`` propagates taint from its result to its first +argument. + +Similarly, ``FunctionOutput`` is an abstract representation of the +outputs to a function. The options are: + +* the receiver (``outp.isReceiver()``) +* one of the parameters (``outp.isParameter(i)``) +* one of the results (``outp.isResult(i)``, or ``outp.isResult`` if there is only one result) + +Here is an example from ``Gin.qll``, which has been slightly simplified. + +.. code-block:: ql + + private class ParamsGet extends TaintTracking::FunctionModel, Method { + ParamsGet() { this.hasQualifiedName("github.com/gin-gonic/gin", "Params", "Get") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult(0) + } + } + +This has the effect that calls to the ``Get`` method with receiver type +``Params`` from the ``gin-gonic/gin`` package allow taint to flow from +the receiver to the first result. In other words, if ``p`` has type +``Params`` and taint can flow to it, then after the line +``x := p.Get("foo")`` taint can also flow to ``x``. + +Sanitizers +---------- + +It is not necessary to indicate that library functions are sanitizers. +Their bodies are not analyzed, so it is assumed that data does not +flow through them. + +Sinks +----- + +Data-flow sinks are specified by queries rather than by library models. +However, you can use library models to indicate when functions belong to +special categories. Queries can then use these categories when specifying +sinks. Classes representing these special categories are contained in +``go/ql/lib/semmle/go/Concepts.qll`` in the `CodeQL for Go repository +`__. +``Concepts.qll`` includes classes for logger mechanisms, +HTTP response writers, HTTP redirects, and marshaling and unmarshaling +functions. + +Here is a short example from ``Stdlib.qll``, which has been slightly simplified. + +.. code-block:: ql + + private class PrintfCall extends LoggerCall::Range, DataFlow::CallNode { + PrintfCall() { this.getTarget().hasQualifiedName("fmt", ["Print", "Printf", "Println"]) } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } + +This has the effect that any call to ``Print``, ``Printf``, or +``Println`` in the package ``fmt`` is recognized as a logger call. +Any query that uses logger calls as a sink will then identify when tainted data +has been passed as an argument to ``Print``, ``Printf``, or ``Println``. \ No newline at end of file diff --git a/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/exprs.ql b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/exprs.ql new file mode 100644 index 00000000000..b4b7498e27c --- /dev/null +++ b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/exprs.ql @@ -0,0 +1,41 @@ +class Expr_ extends @expr { + string toString() { result = "Expr" } +} + +class ExprParent_ extends @exprparent { + string toString() { result = "ExprParent" } +} + +/** + * Two new kinds have been inserted such that `@sliceexpr` which used to have + * index 13 now has index 15. Another new kind has been inserted such that + * `@plusexpr` which used to have index 23 now has index 26. Entries with + * indices lower than 13 are unchanged. + */ +bindingset[new_index] +int old_index(int new_index) { + if new_index < 13 + then result = new_index + else + if new_index = [13, 14] + then result = 0 // badexpr + else + if new_index < 23 + then result + (15 - 13) = new_index + else + if new_index = 23 + then result = 0 // badexpr + else result + (26 - 23) = new_index +} + +// The schema for exprs is: +// +// exprs(unique int id: @expr, +// int kind: int ref, +// int parent: @exprparent ref, +// int idx: int ref); +from Expr_ expr, int new_kind, ExprParent_ parent, int idx, int old_kind +where + exprs(expr, new_kind, parent, idx) and + old_kind = old_index(new_kind) +select expr, old_kind, parent, idx diff --git a/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/go.dbscheme b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/go.dbscheme new file mode 100644 index 00000000000..8f168c8af3f --- /dev/null +++ b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/go.dbscheme @@ -0,0 +1,531 @@ +/** Auto-generated dbscheme; do not edit. */ + + +/** Duplicate code **/ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity; + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/** External data **/ + +externalData( + int id : @externalDataElement, + varchar(900) path : string ref, + int column: int ref, + varchar(900) value : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); + + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +compilations(unique int id: @compilation, string cwd: string ref); + +#keyset[id, num] +compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref); + +#keyset[id, num, kind] +compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref); + +diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref); + +compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref); + +#keyset[id, num] +compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref); + +diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref, + string full_error_message: string ref, int location: @location ref); + +locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref, + int endLine: int ref, int endColumn: int ref); + +numlines(int element_id: @sourceline ref, int num_lines: int ref, int num_code: int ref, int num_comment: int ref); + +files(unique int id: @file, string name: string ref); + +folders(unique int id: @folder, string name: string ref); + +containerparent(int parent: @container ref, unique int child: @container ref); + +has_location(unique int locatable: @locatable ref, int location: @location ref); + +#keyset[parent, idx] +comment_groups(unique int id: @comment_group, int parent: @file ref, int idx: int ref); + +comments(unique int id: @comment, int kind: int ref, int parent: @comment_group ref, int idx: int ref, string text: string ref); + +doc_comments(unique int node: @documentable ref, int comment: @comment_group ref); + +#keyset[parent, idx] +exprs(unique int id: @expr, int kind: int ref, int parent: @exprparent ref, int idx: int ref); + +literals(unique int expr: @expr ref, string value: string ref, string raw: string ref); + +constvalues(unique int expr: @expr ref, string value: string ref, string exact: string ref); + +fields(unique int id: @field, int parent: @fieldparent ref, int idx: int ref); + +#keyset[parent, idx] +stmts(unique int id: @stmt, int kind: int ref, int parent: @stmtparent ref, int idx: int ref); + +#keyset[parent, idx] +decls(unique int id: @decl, int kind: int ref, int parent: @declparent ref, int idx: int ref); + +#keyset[parent, idx] +specs(unique int id: @spec, int kind: int ref, int parent: @gendecl ref, int idx: int ref); + +scopes(unique int id: @scope, int kind: int ref); + +scopenesting(unique int inner: @scope ref, int outer: @scope ref); + +scopenodes(unique int node: @scopenode ref, int scope: @localscope ref); + +objects(unique int id: @object, int kind: int ref, string name: string ref); + +objectscopes(unique int object: @object ref, int scope: @scope ref); + +objecttypes(unique int object: @object ref, int tp: @type ref); + +methodreceivers(unique int method: @object ref, int receiver: @object ref); + +fieldstructs(unique int field: @object ref, int struct: @structtype ref); + +methodhosts(int method: @object ref, int host: @namedtype ref); + +defs(int ident: @ident ref, int object: @object ref); + +uses(int ident: @ident ref, int object: @object ref); + +types(unique int id: @type, int kind: int ref); + +type_of(unique int expr: @expr ref, int tp: @type ref); + +typename(unique int tp: @type ref, string name: string ref); + +key_type(unique int map: @maptype ref, int tp: @type ref); + +element_type(unique int container: @containertype ref, int tp: @type ref); + +base_type(unique int ptr: @pointertype ref, int tp: @type ref); + +underlying_type(unique int named: @namedtype ref, int tp: @type ref); + +#keyset[parent, index] +component_types(int parent: @compositetype ref, int index: int ref, string name: string ref, int tp: @type ref); + +array_length(unique int tp: @arraytype ref, string len: string ref); + +type_objects(unique int tp: @type ref, int object: @object ref); + +packages(unique int id: @package, string name: string ref, string path: string ref, int scope: @packagescope ref); + +#keyset[parent, idx] +modexprs(unique int id: @modexpr, int kind: int ref, int parent: @modexprparent ref, int idx: int ref); + +#keyset[parent, idx] +modtokens(string token: string ref, int parent: @modexpr ref, int idx: int ref); + +#keyset[package, idx] +errors(unique int id: @error, int kind: int ref, string msg: string ref, string rawpos: string ref, + string file: string ref, int line: int ref, int col: int ref, int package: @package ref, int idx: int ref); + +has_ellipsis(int id: @callorconversionexpr ref); + +variadic(int id: @signaturetype ref); + +@container = @file | @folder; + +@locatable = @xmllocatable | @node | @localscope; + +@node = @documentable | @exprparent | @modexprparent | @fieldparent | @stmtparent | @declparent | @scopenode + | @comment_group | @comment; + +@documentable = @file | @field | @spec | @gendecl | @funcdecl | @modexpr; + +@exprparent = @funcdef | @file | @expr | @field | @stmt | @decl | @spec; + +@modexprparent = @file | @modexpr; + +@fieldparent = @decl | @structtypeexpr | @functypeexpr | @interfacetypeexpr; + +@stmtparent = @funcdef | @stmt | @decl; + +@declparent = @file | @declstmt; + +@funcdef = @funclit | @funcdecl; + +@scopenode = @file | @functypeexpr | @blockstmt | @ifstmt | @caseclause | @switchstmt | @commclause | @loopstmt; + +@location = @location_default; + +@sourceline = @locatable; + +case @comment.kind of + 0 = @slashslashcomment +| 1 = @slashstarcomment; + +case @expr.kind of + 0 = @badexpr +| 1 = @ident +| 2 = @ellipsis +| 3 = @intlit +| 4 = @floatlit +| 5 = @imaglit +| 6 = @charlit +| 7 = @stringlit +| 8 = @funclit +| 9 = @compositelit +| 10 = @parenexpr +| 11 = @selectorexpr +| 12 = @indexexpr +| 13 = @sliceexpr +| 14 = @typeassertexpr +| 15 = @callorconversionexpr +| 16 = @starexpr +| 17 = @keyvalueexpr +| 18 = @arraytypeexpr +| 19 = @structtypeexpr +| 20 = @functypeexpr +| 21 = @interfacetypeexpr +| 22 = @maptypeexpr +| 23 = @plusexpr +| 24 = @minusexpr +| 25 = @notexpr +| 26 = @complementexpr +| 27 = @derefexpr +| 28 = @addressexpr +| 29 = @arrowexpr +| 30 = @lorexpr +| 31 = @landexpr +| 32 = @eqlexpr +| 33 = @neqexpr +| 34 = @lssexpr +| 35 = @leqexpr +| 36 = @gtrexpr +| 37 = @geqexpr +| 38 = @addexpr +| 39 = @subexpr +| 40 = @orexpr +| 41 = @xorexpr +| 42 = @mulexpr +| 43 = @quoexpr +| 44 = @remexpr +| 45 = @shlexpr +| 46 = @shrexpr +| 47 = @andexpr +| 48 = @andnotexpr +| 49 = @sendchantypeexpr +| 50 = @recvchantypeexpr +| 51 = @sendrcvchantypeexpr +| 52 = @errorexpr; + +@basiclit = @intlit | @floatlit | @imaglit | @charlit | @stringlit; + +@operatorexpr = @logicalexpr | @arithmeticexpr | @bitwiseexpr | @unaryexpr | @binaryexpr; + +@logicalexpr = @logicalunaryexpr | @logicalbinaryexpr; + +@arithmeticexpr = @arithmeticunaryexpr | @arithmeticbinaryexpr; + +@bitwiseexpr = @bitwiseunaryexpr | @bitwisebinaryexpr; + +@unaryexpr = @logicalunaryexpr | @bitwiseunaryexpr | @arithmeticunaryexpr | @derefexpr | @addressexpr | @arrowexpr; + +@logicalunaryexpr = @notexpr; + +@bitwiseunaryexpr = @complementexpr; + +@arithmeticunaryexpr = @plusexpr | @minusexpr; + +@binaryexpr = @logicalbinaryexpr | @bitwisebinaryexpr | @arithmeticbinaryexpr | @comparison; + +@logicalbinaryexpr = @lorexpr | @landexpr; + +@bitwisebinaryexpr = @shiftexpr | @orexpr | @xorexpr | @andexpr | @andnotexpr; + +@arithmeticbinaryexpr = @addexpr | @subexpr | @mulexpr | @quoexpr | @remexpr; + +@shiftexpr = @shlexpr | @shrexpr; + +@comparison = @equalitytest | @relationalcomparison; + +@equalitytest = @eqlexpr | @neqexpr; + +@relationalcomparison = @lssexpr | @leqexpr | @gtrexpr | @geqexpr; + +@chantypeexpr = @sendchantypeexpr | @recvchantypeexpr | @sendrcvchantypeexpr; + +case @stmt.kind of + 0 = @badstmt +| 1 = @declstmt +| 2 = @emptystmt +| 3 = @labeledstmt +| 4 = @exprstmt +| 5 = @sendstmt +| 6 = @incstmt +| 7 = @decstmt +| 8 = @gostmt +| 9 = @deferstmt +| 10 = @returnstmt +| 11 = @breakstmt +| 12 = @continuestmt +| 13 = @gotostmt +| 14 = @fallthroughstmt +| 15 = @blockstmt +| 16 = @ifstmt +| 17 = @caseclause +| 18 = @exprswitchstmt +| 19 = @typeswitchstmt +| 20 = @commclause +| 21 = @selectstmt +| 22 = @forstmt +| 23 = @rangestmt +| 24 = @assignstmt +| 25 = @definestmt +| 26 = @addassignstmt +| 27 = @subassignstmt +| 28 = @mulassignstmt +| 29 = @quoassignstmt +| 30 = @remassignstmt +| 31 = @andassignstmt +| 32 = @orassignstmt +| 33 = @xorassignstmt +| 34 = @shlassignstmt +| 35 = @shrassignstmt +| 36 = @andnotassignstmt; + +@incdecstmt = @incstmt | @decstmt; + +@assignment = @simpleassignstmt | @compoundassignstmt; + +@simpleassignstmt = @assignstmt | @definestmt; + +@compoundassignstmt = @addassignstmt | @subassignstmt | @mulassignstmt | @quoassignstmt | @remassignstmt + | @andassignstmt | @orassignstmt | @xorassignstmt | @shlassignstmt | @shrassignstmt | @andnotassignstmt; + +@branchstmt = @breakstmt | @continuestmt | @gotostmt | @fallthroughstmt; + +@switchstmt = @exprswitchstmt | @typeswitchstmt; + +@loopstmt = @forstmt | @rangestmt; + +case @decl.kind of + 0 = @baddecl +| 1 = @importdecl +| 2 = @constdecl +| 3 = @typedecl +| 4 = @vardecl +| 5 = @funcdecl; + +@gendecl = @importdecl | @constdecl | @typedecl | @vardecl; + +case @spec.kind of + 0 = @importspec +| 1 = @valuespec +| 2 = @typedefspec +| 3 = @aliasspec; + +@typespec = @typedefspec | @aliasspec; + +case @object.kind of + 0 = @pkgobject +| 1 = @decltypeobject +| 2 = @builtintypeobject +| 3 = @declconstobject +| 4 = @builtinconstobject +| 5 = @declvarobject +| 6 = @declfunctionobject +| 7 = @builtinfunctionobject +| 8 = @labelobject; + +@declobject = @decltypeobject | @declconstobject | @declvarobject | @declfunctionobject; + +@builtinobject = @builtintypeobject | @builtinconstobject | @builtinfunctionobject; + +@typeobject = @decltypeobject | @builtintypeobject; + +@valueobject = @constobject | @varobject | @functionobject; + +@constobject = @declconstobject | @builtinconstobject; + +@varobject = @declvarobject; + +@functionobject = @declfunctionobject | @builtinfunctionobject; + +case @scope.kind of + 0 = @universescope +| 1 = @packagescope +| 2 = @localscope; + +case @type.kind of + 0 = @invalidtype +| 1 = @boolexprtype +| 2 = @inttype +| 3 = @int8type +| 4 = @int16type +| 5 = @int32type +| 6 = @int64type +| 7 = @uinttype +| 8 = @uint8type +| 9 = @uint16type +| 10 = @uint32type +| 11 = @uint64type +| 12 = @uintptrtype +| 13 = @float32type +| 14 = @float64type +| 15 = @complex64type +| 16 = @complex128type +| 17 = @stringexprtype +| 18 = @unsafepointertype +| 19 = @boolliteraltype +| 20 = @intliteraltype +| 21 = @runeliteraltype +| 22 = @floatliteraltype +| 23 = @complexliteraltype +| 24 = @stringliteraltype +| 25 = @nilliteraltype +| 26 = @arraytype +| 27 = @slicetype +| 28 = @structtype +| 29 = @pointertype +| 30 = @interfacetype +| 31 = @tupletype +| 32 = @signaturetype +| 33 = @maptype +| 34 = @sendchantype +| 35 = @recvchantype +| 36 = @sendrcvchantype +| 37 = @namedtype; + +@basictype = @booltype | @numerictype | @stringtype | @literaltype | @invalidtype | @unsafepointertype; + +@booltype = @boolexprtype | @boolliteraltype; + +@numerictype = @integertype | @floattype | @complextype; + +@integertype = @signedintegertype | @unsignedintegertype; + +@signedintegertype = @inttype | @int8type | @int16type | @int32type | @int64type | @intliteraltype | @runeliteraltype; + +@unsignedintegertype = @uinttype | @uint8type | @uint16type | @uint32type | @uint64type | @uintptrtype; + +@floattype = @float32type | @float64type | @floatliteraltype; + +@complextype = @complex64type | @complex128type | @complexliteraltype; + +@stringtype = @stringexprtype | @stringliteraltype; + +@literaltype = @boolliteraltype | @intliteraltype | @runeliteraltype | @floatliteraltype | @complexliteraltype + | @stringliteraltype | @nilliteraltype; + +@compositetype = @containertype | @structtype | @pointertype | @interfacetype | @tupletype | @signaturetype | @namedtype; + +@containertype = @arraytype | @slicetype | @maptype | @chantype; + +@chantype = @sendchantype | @recvchantype | @sendrcvchantype; + +case @modexpr.kind of + 0 = @modcommentblock +| 1 = @modline +| 2 = @modlineblock +| 3 = @modlparen +| 4 = @modrparen; + +case @error.kind of + 0 = @unknownerror +| 1 = @listerror +| 2 = @parseerror +| 3 = @typeerror; + diff --git a/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/old.dbscheme b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/old.dbscheme new file mode 100644 index 00000000000..90fa7836e0a --- /dev/null +++ b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/old.dbscheme @@ -0,0 +1,547 @@ +/** Auto-generated dbscheme; do not edit. */ + + +/** Duplicate code **/ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity; + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/** External data **/ + +externalData( + int id : @externalDataElement, + varchar(900) path : string ref, + int column: int ref, + varchar(900) value : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); + + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +compilations(unique int id: @compilation, string cwd: string ref); + +#keyset[id, num] +compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref); + +#keyset[id, num, kind] +compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref); + +diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref); + +compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref); + +#keyset[id, num] +compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref); + +diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref, + string full_error_message: string ref, int location: @location ref); + +locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref, + int endLine: int ref, int endColumn: int ref); + +numlines(int element_id: @sourceline ref, int num_lines: int ref, int num_code: int ref, int num_comment: int ref); + +files(unique int id: @file, string name: string ref); + +folders(unique int id: @folder, string name: string ref); + +containerparent(int parent: @container ref, unique int child: @container ref); + +has_location(unique int locatable: @locatable ref, int location: @location ref); + +#keyset[parent, idx] +comment_groups(unique int id: @comment_group, int parent: @file ref, int idx: int ref); + +comments(unique int id: @comment, int kind: int ref, int parent: @comment_group ref, int idx: int ref, string text: string ref); + +doc_comments(unique int node: @documentable ref, int comment: @comment_group ref); + +#keyset[parent, idx] +exprs(unique int id: @expr, int kind: int ref, int parent: @exprparent ref, int idx: int ref); + +literals(unique int expr: @expr ref, string value: string ref, string raw: string ref); + +constvalues(unique int expr: @expr ref, string value: string ref, string exact: string ref); + +fields(unique int id: @field, int parent: @fieldparent ref, int idx: int ref); + +typeparamdecls(unique int id: @typeparamdecl, int parent: @typeparamdeclparent ref, int idx: int ref); + +#keyset[parent, idx] +stmts(unique int id: @stmt, int kind: int ref, int parent: @stmtparent ref, int idx: int ref); + +#keyset[parent, idx] +decls(unique int id: @decl, int kind: int ref, int parent: @declparent ref, int idx: int ref); + +#keyset[parent, idx] +specs(unique int id: @spec, int kind: int ref, int parent: @gendecl ref, int idx: int ref); + +scopes(unique int id: @scope, int kind: int ref); + +scopenesting(unique int inner: @scope ref, int outer: @scope ref); + +scopenodes(unique int node: @scopenode ref, int scope: @localscope ref); + +objects(unique int id: @object, int kind: int ref, string name: string ref); + +objectscopes(unique int object: @object ref, int scope: @scope ref); + +objecttypes(unique int object: @object ref, int tp: @type ref); + +methodreceivers(unique int method: @object ref, int receiver: @object ref); + +fieldstructs(unique int field: @object ref, int struct: @structtype ref); + +methodhosts(int method: @object ref, int host: @namedtype ref); + +defs(int ident: @ident ref, int object: @object ref); + +uses(int ident: @ident ref, int object: @object ref); + +types(unique int id: @type, int kind: int ref); + +type_of(unique int expr: @expr ref, int tp: @type ref); + +typename(unique int tp: @type ref, string name: string ref); + +key_type(unique int map: @maptype ref, int tp: @type ref); + +element_type(unique int container: @containertype ref, int tp: @type ref); + +base_type(unique int ptr: @pointertype ref, int tp: @type ref); + +underlying_type(unique int named: @namedtype ref, int tp: @type ref); + +#keyset[parent, index] +component_types(int parent: @compositetype ref, int index: int ref, string name: string ref, int tp: @type ref); + +array_length(unique int tp: @arraytype ref, string len: string ref); + +type_objects(unique int tp: @type ref, int object: @object ref); + +packages(unique int id: @package, string name: string ref, string path: string ref, int scope: @packagescope ref); + +#keyset[parent, idx] +modexprs(unique int id: @modexpr, int kind: int ref, int parent: @modexprparent ref, int idx: int ref); + +#keyset[parent, idx] +modtokens(string token: string ref, int parent: @modexpr ref, int idx: int ref); + +#keyset[package, idx] +errors(unique int id: @error, int kind: int ref, string msg: string ref, string rawpos: string ref, + string file: string ref, int line: int ref, int col: int ref, int package: @package ref, int idx: int ref); + +has_ellipsis(int id: @callorconversionexpr ref); + +variadic(int id: @signaturetype ref); + +#keyset[parent, idx] +typeparam(unique int tp: @typeparamtype ref, string name: string ref, int bound: @compositetype ref, + int parent: @typeparamparentobject ref, int idx: int ref); + +@container = @file | @folder; + +@locatable = @xmllocatable | @node | @localscope; + +@node = @documentable | @exprparent | @modexprparent | @fieldparent | @stmtparent | @declparent | @typeparamdeclparent + | @scopenode | @comment_group | @comment; + +@documentable = @file | @field | @typeparamdecl | @spec | @gendecl | @funcdecl | @modexpr; + +@exprparent = @funcdef | @file | @expr | @field | @stmt | @decl | @typeparamdecl | @spec; + +@modexprparent = @file | @modexpr; + +@fieldparent = @decl | @structtypeexpr | @functypeexpr | @interfacetypeexpr; + +@stmtparent = @funcdef | @stmt | @decl; + +@declparent = @file | @declstmt; + +@typeparamdeclparent = @funcdecl | @typespec; + +@funcdef = @funclit | @funcdecl; + +@scopenode = @file | @functypeexpr | @blockstmt | @ifstmt | @caseclause | @switchstmt | @commclause | @loopstmt; + +@location = @location_default; + +@sourceline = @locatable; + +case @comment.kind of + 0 = @slashslashcomment +| 1 = @slashstarcomment; + +case @expr.kind of + 0 = @badexpr +| 1 = @ident +| 2 = @ellipsis +| 3 = @intlit +| 4 = @floatlit +| 5 = @imaglit +| 6 = @charlit +| 7 = @stringlit +| 8 = @funclit +| 9 = @compositelit +| 10 = @parenexpr +| 11 = @selectorexpr +| 12 = @indexexpr +| 13 = @genericfunctioninstantiationexpr +| 14 = @generictypeinstantiationexpr +| 15 = @sliceexpr +| 16 = @typeassertexpr +| 17 = @callorconversionexpr +| 18 = @starexpr +| 19 = @keyvalueexpr +| 20 = @arraytypeexpr +| 21 = @structtypeexpr +| 22 = @functypeexpr +| 23 = @interfacetypeexpr +| 24 = @maptypeexpr +| 25 = @typesetliteralexpr +| 26 = @plusexpr +| 27 = @minusexpr +| 28 = @notexpr +| 29 = @complementexpr +| 30 = @derefexpr +| 31 = @addressexpr +| 32 = @arrowexpr +| 33 = @lorexpr +| 34 = @landexpr +| 35 = @eqlexpr +| 36 = @neqexpr +| 37 = @lssexpr +| 38 = @leqexpr +| 39 = @gtrexpr +| 40 = @geqexpr +| 41 = @addexpr +| 42 = @subexpr +| 43 = @orexpr +| 44 = @xorexpr +| 45 = @mulexpr +| 46 = @quoexpr +| 47 = @remexpr +| 48 = @shlexpr +| 49 = @shrexpr +| 50 = @andexpr +| 51 = @andnotexpr +| 52 = @sendchantypeexpr +| 53 = @recvchantypeexpr +| 54 = @sendrcvchantypeexpr +| 55 = @errorexpr; + +@basiclit = @intlit | @floatlit | @imaglit | @charlit | @stringlit; + +@operatorexpr = @logicalexpr | @arithmeticexpr | @bitwiseexpr | @unaryexpr | @binaryexpr; + +@logicalexpr = @logicalunaryexpr | @logicalbinaryexpr; + +@arithmeticexpr = @arithmeticunaryexpr | @arithmeticbinaryexpr; + +@bitwiseexpr = @bitwiseunaryexpr | @bitwisebinaryexpr; + +@unaryexpr = @logicalunaryexpr | @bitwiseunaryexpr | @arithmeticunaryexpr | @derefexpr | @addressexpr | @arrowexpr; + +@logicalunaryexpr = @notexpr; + +@bitwiseunaryexpr = @complementexpr; + +@arithmeticunaryexpr = @plusexpr | @minusexpr; + +@binaryexpr = @logicalbinaryexpr | @bitwisebinaryexpr | @arithmeticbinaryexpr | @comparison; + +@logicalbinaryexpr = @lorexpr | @landexpr; + +@bitwisebinaryexpr = @shiftexpr | @orexpr | @xorexpr | @andexpr | @andnotexpr; + +@arithmeticbinaryexpr = @addexpr | @subexpr | @mulexpr | @quoexpr | @remexpr; + +@shiftexpr = @shlexpr | @shrexpr; + +@comparison = @equalitytest | @relationalcomparison; + +@equalitytest = @eqlexpr | @neqexpr; + +@relationalcomparison = @lssexpr | @leqexpr | @gtrexpr | @geqexpr; + +@chantypeexpr = @sendchantypeexpr | @recvchantypeexpr | @sendrcvchantypeexpr; + +case @stmt.kind of + 0 = @badstmt +| 1 = @declstmt +| 2 = @emptystmt +| 3 = @labeledstmt +| 4 = @exprstmt +| 5 = @sendstmt +| 6 = @incstmt +| 7 = @decstmt +| 8 = @gostmt +| 9 = @deferstmt +| 10 = @returnstmt +| 11 = @breakstmt +| 12 = @continuestmt +| 13 = @gotostmt +| 14 = @fallthroughstmt +| 15 = @blockstmt +| 16 = @ifstmt +| 17 = @caseclause +| 18 = @exprswitchstmt +| 19 = @typeswitchstmt +| 20 = @commclause +| 21 = @selectstmt +| 22 = @forstmt +| 23 = @rangestmt +| 24 = @assignstmt +| 25 = @definestmt +| 26 = @addassignstmt +| 27 = @subassignstmt +| 28 = @mulassignstmt +| 29 = @quoassignstmt +| 30 = @remassignstmt +| 31 = @andassignstmt +| 32 = @orassignstmt +| 33 = @xorassignstmt +| 34 = @shlassignstmt +| 35 = @shrassignstmt +| 36 = @andnotassignstmt; + +@incdecstmt = @incstmt | @decstmt; + +@assignment = @simpleassignstmt | @compoundassignstmt; + +@simpleassignstmt = @assignstmt | @definestmt; + +@compoundassignstmt = @addassignstmt | @subassignstmt | @mulassignstmt | @quoassignstmt | @remassignstmt + | @andassignstmt | @orassignstmt | @xorassignstmt | @shlassignstmt | @shrassignstmt | @andnotassignstmt; + +@branchstmt = @breakstmt | @continuestmt | @gotostmt | @fallthroughstmt; + +@switchstmt = @exprswitchstmt | @typeswitchstmt; + +@loopstmt = @forstmt | @rangestmt; + +case @decl.kind of + 0 = @baddecl +| 1 = @importdecl +| 2 = @constdecl +| 3 = @typedecl +| 4 = @vardecl +| 5 = @funcdecl; + +@gendecl = @importdecl | @constdecl | @typedecl | @vardecl; + +case @spec.kind of + 0 = @importspec +| 1 = @valuespec +| 2 = @typedefspec +| 3 = @aliasspec; + +@typespec = @typedefspec | @aliasspec; + +case @object.kind of + 0 = @pkgobject +| 1 = @decltypeobject +| 2 = @builtintypeobject +| 3 = @declconstobject +| 4 = @builtinconstobject +| 5 = @declvarobject +| 6 = @declfunctionobject +| 7 = @builtinfunctionobject +| 8 = @labelobject; + +@typeparamparentobject = @decltypeobject | @declfunctionobject; + +@declobject = @decltypeobject | @declconstobject | @declvarobject | @declfunctionobject; + +@builtinobject = @builtintypeobject | @builtinconstobject | @builtinfunctionobject; + +@typeobject = @decltypeobject | @builtintypeobject; + +@valueobject = @constobject | @varobject | @functionobject; + +@constobject = @declconstobject | @builtinconstobject; + +@varobject = @declvarobject; + +@functionobject = @declfunctionobject | @builtinfunctionobject; + +case @scope.kind of + 0 = @universescope +| 1 = @packagescope +| 2 = @localscope; + +case @type.kind of + 0 = @invalidtype +| 1 = @boolexprtype +| 2 = @inttype +| 3 = @int8type +| 4 = @int16type +| 5 = @int32type +| 6 = @int64type +| 7 = @uinttype +| 8 = @uint8type +| 9 = @uint16type +| 10 = @uint32type +| 11 = @uint64type +| 12 = @uintptrtype +| 13 = @float32type +| 14 = @float64type +| 15 = @complex64type +| 16 = @complex128type +| 17 = @stringexprtype +| 18 = @unsafepointertype +| 19 = @boolliteraltype +| 20 = @intliteraltype +| 21 = @runeliteraltype +| 22 = @floatliteraltype +| 23 = @complexliteraltype +| 24 = @stringliteraltype +| 25 = @nilliteraltype +| 26 = @typeparamtype +| 27 = @arraytype +| 28 = @slicetype +| 29 = @structtype +| 30 = @pointertype +| 31 = @interfacetype +| 32 = @tupletype +| 33 = @signaturetype +| 34 = @maptype +| 35 = @sendchantype +| 36 = @recvchantype +| 37 = @sendrcvchantype +| 38 = @namedtype +| 39 = @typesetliteraltype; + +@basictype = @booltype | @numerictype | @stringtype | @literaltype | @invalidtype | @unsafepointertype; + +@booltype = @boolexprtype | @boolliteraltype; + +@numerictype = @integertype | @floattype | @complextype; + +@integertype = @signedintegertype | @unsignedintegertype; + +@signedintegertype = @inttype | @int8type | @int16type | @int32type | @int64type | @intliteraltype | @runeliteraltype; + +@unsignedintegertype = @uinttype | @uint8type | @uint16type | @uint32type | @uint64type | @uintptrtype; + +@floattype = @float32type | @float64type | @floatliteraltype; + +@complextype = @complex64type | @complex128type | @complexliteraltype; + +@stringtype = @stringexprtype | @stringliteraltype; + +@literaltype = @boolliteraltype | @intliteraltype | @runeliteraltype | @floatliteraltype | @complexliteraltype + | @stringliteraltype | @nilliteraltype; + +@compositetype = @typeparamtype | @containertype | @structtype | @pointertype | @interfacetype | @tupletype + | @signaturetype | @namedtype | @typesetliteraltype; + +@containertype = @arraytype | @slicetype | @maptype | @chantype; + +@chantype = @sendchantype | @recvchantype | @sendrcvchantype; + +case @modexpr.kind of + 0 = @modcommentblock +| 1 = @modline +| 2 = @modlineblock +| 3 = @modlparen +| 4 = @modrparen; + +case @error.kind of + 0 = @unknownerror +| 1 = @listerror +| 2 = @parseerror +| 3 = @typeerror; + diff --git a/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/types.ql b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/types.ql new file mode 100644 index 00000000000..0b02cc68085 --- /dev/null +++ b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/types.ql @@ -0,0 +1,31 @@ +class Type_ extends @type { + string toString() { result = "Type" } +} + +/** + * A new kind has been inserted such that `@arraytype` which used to have index + * 26 now has index 27. Another new kind has been inserted at 39, which is the + * end of the list. Entries with lower indices are unchanged. + */ +bindingset[new_index] +int old_index(int new_index) { + if new_index < 26 + then result = new_index + else + if new_index = 26 + then result = 0 // invalidtype + else + if new_index < 39 + then result + (27 - 26) = new_index + else result = 0 // invalidtype +} + +// The schema for types is: +// +// types(unique int id: @type, +// int kind: int ref); +from Type_ type, int new_kind, int old_kind +where + types(type, new_kind) and + old_kind = old_index(new_kind) +select type, old_kind diff --git a/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/upgrade.properties b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/upgrade.properties new file mode 100644 index 00000000000..a233cc281c8 --- /dev/null +++ b/go/downgrades/90fa7836e0a239f69bbebffcf342e92c240d54bc/upgrade.properties @@ -0,0 +1,6 @@ +description: Add generic instantiation expressions and type parameter types +compatibility: full +exprs.rel: run exprs.qlo +types.rel: run types.qlo +typeparamdecls.rel: delete +typeparam.rel: delete \ No newline at end of file diff --git a/go/downgrades/initial/go.dbscheme b/go/downgrades/initial/go.dbscheme new file mode 100644 index 00000000000..8f168c8af3f --- /dev/null +++ b/go/downgrades/initial/go.dbscheme @@ -0,0 +1,531 @@ +/** Auto-generated dbscheme; do not edit. */ + + +/** Duplicate code **/ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity; + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/** External data **/ + +externalData( + int id : @externalDataElement, + varchar(900) path : string ref, + int column: int ref, + varchar(900) value : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); + + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +compilations(unique int id: @compilation, string cwd: string ref); + +#keyset[id, num] +compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref); + +#keyset[id, num, kind] +compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref); + +diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref); + +compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref); + +#keyset[id, num] +compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref); + +diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref, + string full_error_message: string ref, int location: @location ref); + +locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref, + int endLine: int ref, int endColumn: int ref); + +numlines(int element_id: @sourceline ref, int num_lines: int ref, int num_code: int ref, int num_comment: int ref); + +files(unique int id: @file, string name: string ref); + +folders(unique int id: @folder, string name: string ref); + +containerparent(int parent: @container ref, unique int child: @container ref); + +has_location(unique int locatable: @locatable ref, int location: @location ref); + +#keyset[parent, idx] +comment_groups(unique int id: @comment_group, int parent: @file ref, int idx: int ref); + +comments(unique int id: @comment, int kind: int ref, int parent: @comment_group ref, int idx: int ref, string text: string ref); + +doc_comments(unique int node: @documentable ref, int comment: @comment_group ref); + +#keyset[parent, idx] +exprs(unique int id: @expr, int kind: int ref, int parent: @exprparent ref, int idx: int ref); + +literals(unique int expr: @expr ref, string value: string ref, string raw: string ref); + +constvalues(unique int expr: @expr ref, string value: string ref, string exact: string ref); + +fields(unique int id: @field, int parent: @fieldparent ref, int idx: int ref); + +#keyset[parent, idx] +stmts(unique int id: @stmt, int kind: int ref, int parent: @stmtparent ref, int idx: int ref); + +#keyset[parent, idx] +decls(unique int id: @decl, int kind: int ref, int parent: @declparent ref, int idx: int ref); + +#keyset[parent, idx] +specs(unique int id: @spec, int kind: int ref, int parent: @gendecl ref, int idx: int ref); + +scopes(unique int id: @scope, int kind: int ref); + +scopenesting(unique int inner: @scope ref, int outer: @scope ref); + +scopenodes(unique int node: @scopenode ref, int scope: @localscope ref); + +objects(unique int id: @object, int kind: int ref, string name: string ref); + +objectscopes(unique int object: @object ref, int scope: @scope ref); + +objecttypes(unique int object: @object ref, int tp: @type ref); + +methodreceivers(unique int method: @object ref, int receiver: @object ref); + +fieldstructs(unique int field: @object ref, int struct: @structtype ref); + +methodhosts(int method: @object ref, int host: @namedtype ref); + +defs(int ident: @ident ref, int object: @object ref); + +uses(int ident: @ident ref, int object: @object ref); + +types(unique int id: @type, int kind: int ref); + +type_of(unique int expr: @expr ref, int tp: @type ref); + +typename(unique int tp: @type ref, string name: string ref); + +key_type(unique int map: @maptype ref, int tp: @type ref); + +element_type(unique int container: @containertype ref, int tp: @type ref); + +base_type(unique int ptr: @pointertype ref, int tp: @type ref); + +underlying_type(unique int named: @namedtype ref, int tp: @type ref); + +#keyset[parent, index] +component_types(int parent: @compositetype ref, int index: int ref, string name: string ref, int tp: @type ref); + +array_length(unique int tp: @arraytype ref, string len: string ref); + +type_objects(unique int tp: @type ref, int object: @object ref); + +packages(unique int id: @package, string name: string ref, string path: string ref, int scope: @packagescope ref); + +#keyset[parent, idx] +modexprs(unique int id: @modexpr, int kind: int ref, int parent: @modexprparent ref, int idx: int ref); + +#keyset[parent, idx] +modtokens(string token: string ref, int parent: @modexpr ref, int idx: int ref); + +#keyset[package, idx] +errors(unique int id: @error, int kind: int ref, string msg: string ref, string rawpos: string ref, + string file: string ref, int line: int ref, int col: int ref, int package: @package ref, int idx: int ref); + +has_ellipsis(int id: @callorconversionexpr ref); + +variadic(int id: @signaturetype ref); + +@container = @file | @folder; + +@locatable = @xmllocatable | @node | @localscope; + +@node = @documentable | @exprparent | @modexprparent | @fieldparent | @stmtparent | @declparent | @scopenode + | @comment_group | @comment; + +@documentable = @file | @field | @spec | @gendecl | @funcdecl | @modexpr; + +@exprparent = @funcdef | @file | @expr | @field | @stmt | @decl | @spec; + +@modexprparent = @file | @modexpr; + +@fieldparent = @decl | @structtypeexpr | @functypeexpr | @interfacetypeexpr; + +@stmtparent = @funcdef | @stmt | @decl; + +@declparent = @file | @declstmt; + +@funcdef = @funclit | @funcdecl; + +@scopenode = @file | @functypeexpr | @blockstmt | @ifstmt | @caseclause | @switchstmt | @commclause | @loopstmt; + +@location = @location_default; + +@sourceline = @locatable; + +case @comment.kind of + 0 = @slashslashcomment +| 1 = @slashstarcomment; + +case @expr.kind of + 0 = @badexpr +| 1 = @ident +| 2 = @ellipsis +| 3 = @intlit +| 4 = @floatlit +| 5 = @imaglit +| 6 = @charlit +| 7 = @stringlit +| 8 = @funclit +| 9 = @compositelit +| 10 = @parenexpr +| 11 = @selectorexpr +| 12 = @indexexpr +| 13 = @sliceexpr +| 14 = @typeassertexpr +| 15 = @callorconversionexpr +| 16 = @starexpr +| 17 = @keyvalueexpr +| 18 = @arraytypeexpr +| 19 = @structtypeexpr +| 20 = @functypeexpr +| 21 = @interfacetypeexpr +| 22 = @maptypeexpr +| 23 = @plusexpr +| 24 = @minusexpr +| 25 = @notexpr +| 26 = @complementexpr +| 27 = @derefexpr +| 28 = @addressexpr +| 29 = @arrowexpr +| 30 = @lorexpr +| 31 = @landexpr +| 32 = @eqlexpr +| 33 = @neqexpr +| 34 = @lssexpr +| 35 = @leqexpr +| 36 = @gtrexpr +| 37 = @geqexpr +| 38 = @addexpr +| 39 = @subexpr +| 40 = @orexpr +| 41 = @xorexpr +| 42 = @mulexpr +| 43 = @quoexpr +| 44 = @remexpr +| 45 = @shlexpr +| 46 = @shrexpr +| 47 = @andexpr +| 48 = @andnotexpr +| 49 = @sendchantypeexpr +| 50 = @recvchantypeexpr +| 51 = @sendrcvchantypeexpr +| 52 = @errorexpr; + +@basiclit = @intlit | @floatlit | @imaglit | @charlit | @stringlit; + +@operatorexpr = @logicalexpr | @arithmeticexpr | @bitwiseexpr | @unaryexpr | @binaryexpr; + +@logicalexpr = @logicalunaryexpr | @logicalbinaryexpr; + +@arithmeticexpr = @arithmeticunaryexpr | @arithmeticbinaryexpr; + +@bitwiseexpr = @bitwiseunaryexpr | @bitwisebinaryexpr; + +@unaryexpr = @logicalunaryexpr | @bitwiseunaryexpr | @arithmeticunaryexpr | @derefexpr | @addressexpr | @arrowexpr; + +@logicalunaryexpr = @notexpr; + +@bitwiseunaryexpr = @complementexpr; + +@arithmeticunaryexpr = @plusexpr | @minusexpr; + +@binaryexpr = @logicalbinaryexpr | @bitwisebinaryexpr | @arithmeticbinaryexpr | @comparison; + +@logicalbinaryexpr = @lorexpr | @landexpr; + +@bitwisebinaryexpr = @shiftexpr | @orexpr | @xorexpr | @andexpr | @andnotexpr; + +@arithmeticbinaryexpr = @addexpr | @subexpr | @mulexpr | @quoexpr | @remexpr; + +@shiftexpr = @shlexpr | @shrexpr; + +@comparison = @equalitytest | @relationalcomparison; + +@equalitytest = @eqlexpr | @neqexpr; + +@relationalcomparison = @lssexpr | @leqexpr | @gtrexpr | @geqexpr; + +@chantypeexpr = @sendchantypeexpr | @recvchantypeexpr | @sendrcvchantypeexpr; + +case @stmt.kind of + 0 = @badstmt +| 1 = @declstmt +| 2 = @emptystmt +| 3 = @labeledstmt +| 4 = @exprstmt +| 5 = @sendstmt +| 6 = @incstmt +| 7 = @decstmt +| 8 = @gostmt +| 9 = @deferstmt +| 10 = @returnstmt +| 11 = @breakstmt +| 12 = @continuestmt +| 13 = @gotostmt +| 14 = @fallthroughstmt +| 15 = @blockstmt +| 16 = @ifstmt +| 17 = @caseclause +| 18 = @exprswitchstmt +| 19 = @typeswitchstmt +| 20 = @commclause +| 21 = @selectstmt +| 22 = @forstmt +| 23 = @rangestmt +| 24 = @assignstmt +| 25 = @definestmt +| 26 = @addassignstmt +| 27 = @subassignstmt +| 28 = @mulassignstmt +| 29 = @quoassignstmt +| 30 = @remassignstmt +| 31 = @andassignstmt +| 32 = @orassignstmt +| 33 = @xorassignstmt +| 34 = @shlassignstmt +| 35 = @shrassignstmt +| 36 = @andnotassignstmt; + +@incdecstmt = @incstmt | @decstmt; + +@assignment = @simpleassignstmt | @compoundassignstmt; + +@simpleassignstmt = @assignstmt | @definestmt; + +@compoundassignstmt = @addassignstmt | @subassignstmt | @mulassignstmt | @quoassignstmt | @remassignstmt + | @andassignstmt | @orassignstmt | @xorassignstmt | @shlassignstmt | @shrassignstmt | @andnotassignstmt; + +@branchstmt = @breakstmt | @continuestmt | @gotostmt | @fallthroughstmt; + +@switchstmt = @exprswitchstmt | @typeswitchstmt; + +@loopstmt = @forstmt | @rangestmt; + +case @decl.kind of + 0 = @baddecl +| 1 = @importdecl +| 2 = @constdecl +| 3 = @typedecl +| 4 = @vardecl +| 5 = @funcdecl; + +@gendecl = @importdecl | @constdecl | @typedecl | @vardecl; + +case @spec.kind of + 0 = @importspec +| 1 = @valuespec +| 2 = @typedefspec +| 3 = @aliasspec; + +@typespec = @typedefspec | @aliasspec; + +case @object.kind of + 0 = @pkgobject +| 1 = @decltypeobject +| 2 = @builtintypeobject +| 3 = @declconstobject +| 4 = @builtinconstobject +| 5 = @declvarobject +| 6 = @declfunctionobject +| 7 = @builtinfunctionobject +| 8 = @labelobject; + +@declobject = @decltypeobject | @declconstobject | @declvarobject | @declfunctionobject; + +@builtinobject = @builtintypeobject | @builtinconstobject | @builtinfunctionobject; + +@typeobject = @decltypeobject | @builtintypeobject; + +@valueobject = @constobject | @varobject | @functionobject; + +@constobject = @declconstobject | @builtinconstobject; + +@varobject = @declvarobject; + +@functionobject = @declfunctionobject | @builtinfunctionobject; + +case @scope.kind of + 0 = @universescope +| 1 = @packagescope +| 2 = @localscope; + +case @type.kind of + 0 = @invalidtype +| 1 = @boolexprtype +| 2 = @inttype +| 3 = @int8type +| 4 = @int16type +| 5 = @int32type +| 6 = @int64type +| 7 = @uinttype +| 8 = @uint8type +| 9 = @uint16type +| 10 = @uint32type +| 11 = @uint64type +| 12 = @uintptrtype +| 13 = @float32type +| 14 = @float64type +| 15 = @complex64type +| 16 = @complex128type +| 17 = @stringexprtype +| 18 = @unsafepointertype +| 19 = @boolliteraltype +| 20 = @intliteraltype +| 21 = @runeliteraltype +| 22 = @floatliteraltype +| 23 = @complexliteraltype +| 24 = @stringliteraltype +| 25 = @nilliteraltype +| 26 = @arraytype +| 27 = @slicetype +| 28 = @structtype +| 29 = @pointertype +| 30 = @interfacetype +| 31 = @tupletype +| 32 = @signaturetype +| 33 = @maptype +| 34 = @sendchantype +| 35 = @recvchantype +| 36 = @sendrcvchantype +| 37 = @namedtype; + +@basictype = @booltype | @numerictype | @stringtype | @literaltype | @invalidtype | @unsafepointertype; + +@booltype = @boolexprtype | @boolliteraltype; + +@numerictype = @integertype | @floattype | @complextype; + +@integertype = @signedintegertype | @unsignedintegertype; + +@signedintegertype = @inttype | @int8type | @int16type | @int32type | @int64type | @intliteraltype | @runeliteraltype; + +@unsignedintegertype = @uinttype | @uint8type | @uint16type | @uint32type | @uint64type | @uintptrtype; + +@floattype = @float32type | @float64type | @floatliteraltype; + +@complextype = @complex64type | @complex128type | @complexliteraltype; + +@stringtype = @stringexprtype | @stringliteraltype; + +@literaltype = @boolliteraltype | @intliteraltype | @runeliteraltype | @floatliteraltype | @complexliteraltype + | @stringliteraltype | @nilliteraltype; + +@compositetype = @containertype | @structtype | @pointertype | @interfacetype | @tupletype | @signaturetype | @namedtype; + +@containertype = @arraytype | @slicetype | @maptype | @chantype; + +@chantype = @sendchantype | @recvchantype | @sendrcvchantype; + +case @modexpr.kind of + 0 = @modcommentblock +| 1 = @modline +| 2 = @modlineblock +| 3 = @modlparen +| 4 = @modrparen; + +case @error.kind of + 0 = @unknownerror +| 1 = @listerror +| 2 = @parseerror +| 3 = @typeerror; + diff --git a/go/downgrades/qlpack.yml b/go/downgrades/qlpack.yml new file mode 100644 index 00000000000..d3e056bea64 --- /dev/null +++ b/go/downgrades/qlpack.yml @@ -0,0 +1,4 @@ +name: codeql/go-downgrades +groups: go +downgrades: . +library: true diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/code-scanning-selectors.yml b/go/external-packs/codeql/suite-helpers/0.0.2/code-scanning-selectors.yml new file mode 100644 index 00000000000..116d7288ddf --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/code-scanning-selectors.yml @@ -0,0 +1,27 @@ +- description: Selectors for selecting the Code-Scanning-relevant queries for a language +- include: + kind: + - problem + - path-problem + - alert + - path-alert + precision: + - high + - very-high + problem.severity: + - error + - warning + tags contain: + - security +- include: + kind: + - diagnostic +- include: + kind: + - metric + tags contain: + - summary +- exclude: + deprecated: // +- exclude: + query path: /^experimental\/.*/ diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-displayed-only.yml b/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-displayed-only.yml new file mode 100644 index 00000000000..1b7237495e2 --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-displayed-only.yml @@ -0,0 +1,12 @@ +- description: Selectors for excluding queries that LGTM doesn't display by default +- exclude: + kind: + - problem + - path-problem + precision: medium +- exclude: + kind: + - problem + - path-problem + precision: high + problem.severity: recommendation diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-selectors.yml b/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-selectors.yml new file mode 100644 index 00000000000..c83484cb1a4 --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/lgtm-selectors.yml @@ -0,0 +1,25 @@ +- description: Selectors for selecting the LGTM-relevant queries for a language +- include: + kind: + - problem + - path-problem + precision: + - high + - very-high +- include: + kind: + - problem + - path-problem + precision: medium + problem.severity: + - error + - warning +- include: + kind: + - definitions + - alert-suppression + - file-classifier +- exclude: + deprecated: // +- exclude: + query path: /^experimental\/.*/ diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/qlpack.yml b/go/external-packs/codeql/suite-helpers/0.0.2/qlpack.yml new file mode 100644 index 00000000000..ca0a6732f5a --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/qlpack.yml @@ -0,0 +1,3 @@ +name: codeql/suite-helpers +version: 0.0.2 +library: true diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/security-and-quality-selectors.yml b/go/external-packs/codeql/suite-helpers/0.0.2/security-and-quality-selectors.yml new file mode 100644 index 00000000000..61466f53886 --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/security-and-quality-selectors.yml @@ -0,0 +1,29 @@ +- description: Selectors for selecting the security-and-quality queries for a language +- include: + kind: + - problem + - path-problem + precision: + - high + - very-high +- include: + kind: + - problem + - path-problem + precision: medium + problem.severity: + - error + - warning +- include: + kind: + - diagnostic +- include: + kind: + - metric + tags contain: + - summary +- exclude: + deprecated: // +- exclude: + query path: /^experimental\/.*/ + diff --git a/go/external-packs/codeql/suite-helpers/0.0.2/security-extended-selectors.yml b/go/external-packs/codeql/suite-helpers/0.0.2/security-extended-selectors.yml new file mode 100644 index 00000000000..c3a82de14f2 --- /dev/null +++ b/go/external-packs/codeql/suite-helpers/0.0.2/security-extended-selectors.yml @@ -0,0 +1,34 @@ +- description: Selectors for selecting the security-extended queries for a language +- include: + kind: + - problem + - path-problem + precision: + - high + - very-high + tags contain: + - security +- include: + kind: + - problem + - path-problem + precision: + - medium + problem.severity: + - error + - warning + tags contain: + - security +- include: + kind: + - diagnostic +- include: + kind: + - metric + tags contain: + - summary +- exclude: + deprecated: // +- exclude: + query path: /^experimental\/.*/ + diff --git a/go/extractor-smoke-test/.gitignore b/go/extractor-smoke-test/.gitignore new file mode 100644 index 00000000000..e32b11d7ee1 --- /dev/null +++ b/go/extractor-smoke-test/.gitignore @@ -0,0 +1,4 @@ +*.bqrs +tracing-out.csv +notracing-out.csv +testdb diff --git a/go/extractor-smoke-test/expected.csv b/go/extractor-smoke-test/expected.csv new file mode 100644 index 00000000000..cb0eb08f8f4 --- /dev/null +++ b/go/extractor-smoke-test/expected.csv @@ -0,0 +1,12 @@ +"1","assignment to i" +"assignment to i","selection of Println" +"call to Println","exit" +"entry","skip" +"entry","skip" +"function declaration","exit" +"i","call to Println" +"nd","col1" +"selection of Println","i" +"skip","1" +"skip","function declaration" +"skip","skip" diff --git a/go/extractor-smoke-test/go.mod b/go/extractor-smoke-test/go.mod new file mode 100644 index 00000000000..0659b4eaec8 --- /dev/null +++ b/go/extractor-smoke-test/go.mod @@ -0,0 +1,3 @@ +module github.com/codeql-go-extractor-smoke-test + +go 1.14 diff --git a/go/extractor-smoke-test/main.go b/go/extractor-smoke-test/main.go new file mode 100644 index 00000000000..2f7b1325daa --- /dev/null +++ b/go/extractor-smoke-test/main.go @@ -0,0 +1,8 @@ +package main + +import "fmt" + +func main() { + var i int = 1 + fmt.Println(i) +} diff --git a/go/extractor-smoke-test/test.sh b/go/extractor-smoke-test/test.sh new file mode 100755 index 00000000000..46a7f8a5956 --- /dev/null +++ b/go/extractor-smoke-test/test.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +cd $DIR + +rm -rf testdb + +codeql database create --language=go testdb --search-path ../build/codeql-extractor-go +codeql dataset check testdb/db-go +codeql query run ../ql/test/library-tests/semmle/go/controlflow/ControlFlowGraph/ControlFlowNode_getASuccessor.ql --database=testdb --output=notracing-out.bqrs --search-path .. +codeql bqrs decode notracing-out.bqrs --format=csv --output=notracing-out.csv +diff -w -u <(sort notracing-out.csv) expected.csv + +# Now do it again with tracing enabled + +export CODEQL_EXTRACTOR_GO_BUILD_TRACING=on + +rm -rf testdb + +codeql database create --language=go testdb --search-path ../build/codeql-extractor-go +codeql dataset check testdb/db-go +codeql query run ../ql/test/library-tests/semmle/go/controlflow/ControlFlowGraph/ControlFlowNode_getASuccessor.ql --database=testdb --output=tracing-out.bqrs --search-path .. +codeql bqrs decode tracing-out.bqrs --format=csv --output=tracing-out.csv +diff -w -u <(sort tracing-out.csv) expected.csv diff --git a/go/extractor/autobuilder/autobuilder.go b/go/extractor/autobuilder/autobuilder.go new file mode 100644 index 00000000000..11c49735eaa --- /dev/null +++ b/go/extractor/autobuilder/autobuilder.go @@ -0,0 +1,81 @@ +// Package autobuilder implements a simple system that attempts to run build commands for common +// build frameworks, if the relevant files exist. +package autobuilder + +import ( + "log" + "os" + "os/exec" + + "github.com/github/codeql-go/extractor/util" +) + +// CheckExtracted sets whether the autobuilder should check whether source files have been extracted +// to the CodeQL source directory as well as whether the build command executed successfully. +var CheckExtracted = false + +// checkEmpty checks whether a directory either doesn't exist or is empty. +func checkEmpty(dir string) (bool, error) { + if !util.DirExists(dir) { + return true, nil + } + + d, err := os.Open(dir) + if err != nil { + return false, err + } + defer d.Close() + + names, err := d.Readdirnames(-1) + if err != nil { + return false, err + } + return len(names) == 0, nil +} + +// checkExtractorRun checks whether the CodeQL Go extractor has run, by checking if the source +// archive directory is empty or not. +func checkExtractorRun() bool { + srcDir := os.Getenv("CODEQL_EXTRACTOR_GO_SOURCE_ARCHIVE_DIR") + if srcDir != "" { + empty, err := checkEmpty(srcDir) + if err != nil { + log.Fatalf("Unable to read source archive directory %s.", srcDir) + } + if empty { + log.Printf("No Go code seen; continuing to try other builds.") + return false + } + return true + } else { + log.Fatalf("No source directory set.\nThis binary should not be run manually; instead, use the CodeQL CLI or VSCode extension. See https://securitylab.github.com/tools/codeql.") + return false + } +} + +// tryBuildIfExists tries to run the command `cmd args...` if the file `buildFile` exists and is not +// a directory. Returns true if the command was successful and false if not. +func tryBuildIfExists(buildFile, cmd string, args ...string) bool { + if util.FileExists(buildFile) { + log.Printf("%s found.\n", buildFile) + return tryBuild(cmd, args...) + } + return false +} + +// tryBuild tries to run `cmd args...`, returning true if successful and false if not. +func tryBuild(cmd string, args ...string) bool { + log.Printf("Trying build command %s %v", cmd, args) + res := util.RunCmd(exec.Command(cmd, args...)) + return res && (!CheckExtracted || checkExtractorRun()) +} + +// Autobuild attempts to detect build system and run the corresponding command. +func Autobuild() bool { + return tryBuildIfExists("Makefile", "make") || + tryBuildIfExists("makefile", "make") || + tryBuildIfExists("GNUmakefile", "make") || + tryBuildIfExists("build.ninja", "ninja") || + tryBuildIfExists("build", "./build") || + tryBuildIfExists("build.sh", "./build.sh") +} diff --git a/go/extractor/cli/go-autobuilder/go-autobuilder.go b/go/extractor/cli/go-autobuilder/go-autobuilder.go new file mode 100644 index 00000000000..07b8a6cb478 --- /dev/null +++ b/go/extractor/cli/go-autobuilder/go-autobuilder.go @@ -0,0 +1,588 @@ +package main + +import ( + "fmt" + "io/ioutil" + "log" + "net/url" + "os" + "os/exec" + "path/filepath" + "regexp" + "runtime" + "strings" + + "golang.org/x/mod/semver" + + "github.com/github/codeql-go/extractor/autobuilder" + "github.com/github/codeql-go/extractor/util" +) + +func usage() { + fmt.Fprintf(os.Stderr, + `%s is a wrapper script that installs dependencies and calls the extractor. + +When LGTM_SRC is not set, the script installs dependencies as described below, and then invokes the +extractor in the working directory. + +If LGTM_SRC is set, it checks for the presence of the files 'go.mod', 'Gopkg.toml', and +'glide.yaml' to determine how to install dependencies: if a 'Gopkg.toml' file is present, it uses +'dep ensure', if there is a 'glide.yaml' it uses 'glide install', and otherwise 'go get'. +Additionally, unless a 'go.mod' file is detected, it sets up a temporary GOPATH and moves all +source files into a folder corresponding to the package's import path before installing +dependencies. + +This behavior can be further customized using environment variables: setting LGTM_INDEX_NEED_GOPATH +to 'false' disables the GOPATH set-up, CODEQL_EXTRACTOR_GO_BUILD_COMMAND (or alternatively +LGTM_INDEX_BUILD_COMMAND), can be set to a newline-separated list of commands to run in order to +install dependencies, and LGTM_INDEX_IMPORT_PATH can be used to override the package import path, +which is otherwise inferred from the SEMMLE_REPO_URL or GITHUB_REPOSITORY environment variables. + +In resource-constrained environments, the environment variable CODEQL_EXTRACTOR_GO_MAX_GOROUTINES +(or its legacy alias SEMMLE_MAX_GOROUTINES) can be used to limit the number of parallel goroutines +started by the extractor, which reduces CPU and memory requirements. The default value for this +variable is 32. +`, + os.Args[0]) + fmt.Fprintf(os.Stderr, "Usage:\n\n %s\n", os.Args[0]) +} + +var goVersion = "" + +// Returns the current Go version as returned by 'go version', e.g. go1.14.4 +func getEnvGoVersion() string { + if goVersion == "" { + gover, err := exec.Command("go", "version").CombinedOutput() + if err != nil { + log.Fatalf("Unable to run the go command, is it installed?\nError: %s", err.Error()) + } + goVersion = strings.Fields(string(gover))[2] + } + return goVersion +} + +// Returns the current Go version in semver format, e.g. v1.14.4 +func getEnvGoSemVer() string { + goVersion := getEnvGoVersion() + if !strings.HasPrefix(goVersion, "go") { + log.Fatalf("Expected 'go version' output of the form 'go1.2.3'; got '%s'", goVersion) + } + return "v" + goVersion[2:] +} + +func tryBuild(buildFile, cmd string, args ...string) bool { + if util.FileExists(buildFile) { + log.Printf("%s found, running %s\n", buildFile, cmd) + return util.RunCmd(exec.Command(cmd, args...)) + } + return false +} + +func getImportPath() (importpath string) { + importpath = os.Getenv("LGTM_INDEX_IMPORT_PATH") + if importpath == "" { + repourl := os.Getenv("SEMMLE_REPO_URL") + if repourl == "" { + githubrepo := os.Getenv("GITHUB_REPOSITORY") + if githubrepo == "" { + log.Printf("Unable to determine import path, as neither LGTM_INDEX_IMPORT_PATH nor GITHUB_REPOSITORY is set\n") + return "" + } else { + importpath = "github.com/" + githubrepo + } + } else { + importpath = getImportPathFromRepoURL(repourl) + if importpath == "" { + log.Printf("Failed to determine import path from SEMMLE_REPO_URL '%s'\n", repourl) + return + } + } + } + log.Printf("Import path is '%s'\n", importpath) + return +} + +func getImportPathFromRepoURL(repourl string) string { + // check for scp-like URL as in "git@github.com:github/codeql-go.git" + shorturl := regexp.MustCompile("^([^@]+@)?([^:]+):([^/].*?)(\\.git)?$") + m := shorturl.FindStringSubmatch(repourl) + if m != nil { + return m[2] + "/" + m[3] + } + + // otherwise parse as proper URL + u, err := url.Parse(repourl) + if err != nil { + log.Fatalf("Malformed repository URL '%s'\n", repourl) + } + + if u.Scheme == "file" { + // we can't determine import paths from file paths + return "" + } + + if u.Hostname() == "" || u.Path == "" { + return "" + } + + host := u.Hostname() + path := u.Path + // strip off leading slashes and trailing `.git` if present + path = regexp.MustCompile("^/+|\\.git$").ReplaceAllString(path, "") + return host + "/" + path +} + +func restoreRepoLayout(fromDir string, dirEntries []string, scratchDirName string, toDir string) { + for _, dirEntry := range dirEntries { + if dirEntry != scratchDirName { + log.Printf("Restoring %s/%s to %s/%s.\n", fromDir, dirEntry, toDir, dirEntry) + err := os.Rename(filepath.Join(fromDir, dirEntry), filepath.Join(toDir, dirEntry)) + if err != nil { + log.Printf("Failed to move file/directory %s from directory %s to directory %s: %s\n", dirEntry, fromDir, toDir, err.Error()) + } + } + } +} + +// DependencyInstallerMode is an enum describing how dependencies should be installed +type DependencyInstallerMode int + +const ( + // GoGetNoModules represents dependency installation using `go get` without modules + GoGetNoModules DependencyInstallerMode = iota + // GoGetWithModules represents dependency installation using `go get` with modules + GoGetWithModules + // Dep represent dependency installation using `dep ensure` + Dep + // Glide represents dependency installation using `glide install` + Glide +) + +// ModMode corresponds to the possible values of the -mod flag for the Go compiler +type ModMode int + +const ( + ModUnset ModMode = iota + ModReadonly + ModMod + ModVendor +) + +func (m ModMode) argsForGoVersion(version string) []string { + switch m { + case ModUnset: + return []string{} + case ModReadonly: + return []string{"-mod=readonly"} + case ModMod: + if !semver.IsValid(version) { + log.Fatalf("Invalid Go semver: '%s'", version) + } + if semver.Compare(version, "v1.14") < 0 { + return []string{} // -mod=mod is the default behaviour for go <= 1.13, and is not accepted as an argument + } else { + return []string{"-mod=mod"} + } + case ModVendor: + return []string{"-mod=vendor"} + } + return nil +} + +// addVersionToMod add a go version directive, e.g. `go 1.14` to a `go.mod` file. +func addVersionToMod(goMod []byte, version string) bool { + cmd := exec.Command("go", "mod", "edit", "-go="+version) + return util.RunCmd(cmd) +} + +// checkVendor tests to see whether a vendor directory is inconsistent according to the go frontend +func checkVendor() bool { + vendorCheckCmd := exec.Command("go", "list", "-mod=vendor", "./...") + outp, err := vendorCheckCmd.CombinedOutput() + if err != nil { + badVendorRe := regexp.MustCompile(`(?m)^go: inconsistent vendoring in .*:$`) + return !badVendorRe.Match(outp) + } + + return true +} + +func main() { + if len(os.Args) > 1 { + usage() + os.Exit(2) + } + + log.Printf("Autobuilder was built with %s, environment has %s\n", runtime.Version(), getEnvGoVersion()) + + srcdir := os.Getenv("LGTM_SRC") + inLGTM := srcdir != "" + if inLGTM { + log.Printf("LGTM_SRC is %s\n", srcdir) + } else { + cwd, err := os.Getwd() + if err != nil { + log.Fatalln("Failed to get current working directory.") + } + log.Printf("LGTM_SRC is not set; defaulting to current working directory %s\n", cwd) + srcdir = cwd + } + + // we set `SEMMLE_PATH_TRANSFORMER` ourselves in some cases, so blank it out first for consistency + os.Setenv("SEMMLE_PATH_TRANSFORMER", "") + + // determine how to install dependencies and whether a GOPATH needs to be set up before + // extraction + depMode := GoGetNoModules + modMode := ModUnset + needGopath := true + if _, present := os.LookupEnv("GO111MODULE"); !present { + os.Setenv("GO111MODULE", "auto") + } + if util.FileExists("go.mod") { + depMode = GoGetWithModules + needGopath = false + log.Println("Found go.mod, enabling go modules") + } else if util.FileExists("Gopkg.toml") { + depMode = Dep + log.Println("Found Gopkg.toml, using dep instead of go get") + } else if util.FileExists("glide.yaml") { + depMode = Glide + log.Println("Found glide.yaml, enabling go modules") + } + + if depMode == GoGetWithModules { + // if a vendor/modules.txt file exists, we assume that there are vendored Go dependencies, and + // skip the dependency installation step and run the extractor with `-mod=vendor` + if util.FileExists("vendor/modules.txt") { + modMode = ModVendor + } else if util.DirExists("vendor") { + modMode = ModMod + } + } + + if modMode == ModVendor { + // fix go vendor issues with go versions >= 1.14 when no go version is specified in the go.mod + // if this is the case, and dependencies were vendored with an old go version (and therefore + // do not contain a '## explicit' annotation, the go command will fail and refuse to do any + // work + // + // we work around this by adding an explicit go version of 1.13, which is the last version + // where this is not an issue + if depMode == GoGetWithModules { + goMod, err := ioutil.ReadFile("go.mod") + if err != nil { + log.Println("Failed to read go.mod to check for missing Go version") + } else if versionRe := regexp.MustCompile(`(?m)^go[ \t\r]+[0-9]+\.[0-9]+$`); !versionRe.Match(goMod) { + // if the go.mod does not contain a version line + modulesTxt, err := ioutil.ReadFile("vendor/modules.txt") + if err != nil { + log.Println("Failed to read vendor/modules.txt to check for mismatched Go version") + } else if explicitRe := regexp.MustCompile("(?m)^## explicit$"); !explicitRe.Match(modulesTxt) { + // and the modules.txt does not contain an explicit annotation + log.Println("Adding a version directive to the go.mod file as the modules.txt does not have explicit annotations") + if !addVersionToMod(goMod, "1.13") { + log.Println("Failed to add a version to the go.mod file to fix explicitly required package bug; not using vendored dependencies") + modMode = ModMod + } + } + } + } + } + + // Go 1.16 and later won't automatically attempt to update go.mod / go.sum during package loading, so try to update them here: + if depMode == GoGetWithModules && semver.Compare(getEnvGoSemVer(), "1.16") >= 0 { + // stat go.mod and go.sum + beforeGoModFileInfo, beforeGoModErr := os.Stat("go.mod") + if beforeGoModErr != nil { + log.Println("Failed to stat go.mod before running `go mod tidy -e`") + } + + beforeGoSumFileInfo, beforeGoSumErr := os.Stat("go.sum") + + // run `go mod tidy -e` + res := util.RunCmd(exec.Command("go", "mod", "tidy", "-e")) + + if !res { + log.Println("Failed to run `go mod tidy -e`") + } else { + if beforeGoModFileInfo != nil { + afterGoModFileInfo, afterGoModErr := os.Stat("go.mod") + if afterGoModErr != nil { + log.Println("Failed to stat go.mod after running `go mod tidy -e`") + } else if afterGoModFileInfo.ModTime().After(beforeGoModFileInfo.ModTime()) { + // if go.mod has been changed then notify the user + log.Println("We have run `go mod tidy -e` and it altered go.mod. You may wish to check these changes into version control. ") + } + } + + afterGoSumFileInfo, afterGoSumErr := os.Stat("go.sum") + if afterGoSumErr != nil { + log.Println("Failed to stat go.sum after running `go mod tidy -e`") + } else { + if beforeGoSumErr != nil || afterGoSumFileInfo.ModTime().After(beforeGoSumFileInfo.ModTime()) { + // if go.sum has been changed then notify the user + log.Println("We have run `go mod tidy -e` and it altered go.sum. You may wish to check these changes into version control. ") + } + } + } + } + + // if `LGTM_INDEX_NEED_GOPATH` is set, it overrides the value for `needGopath` inferred above + if needGopathOverride := os.Getenv("LGTM_INDEX_NEED_GOPATH"); needGopathOverride != "" { + inLGTM = true + if needGopathOverride == "true" { + needGopath = true + } else if needGopathOverride == "false" { + needGopath = false + } else { + log.Fatalf("Unexpected value for Boolean environment variable LGTM_NEED_GOPATH: %v.\n", needGopathOverride) + } + } + + importpath := getImportPath() + if needGopath && importpath == "" { + log.Printf("Failed to determine import path, not setting up GOPATH") + needGopath = false + } + + if inLGTM && needGopath { + // a temporary directory where everything is moved while the correct + // directory structure is created. + scratch, err := ioutil.TempDir(srcdir, "scratch") + if err != nil { + log.Fatalf("Failed to create temporary directory %s in directory %s: %s\n", + scratch, srcdir, err.Error()) + } + log.Printf("Temporary directory is %s.\n", scratch) + + // move all files in `srcdir` to `scratch` + dir, err := os.Open(srcdir) + if err != nil { + log.Fatalf("Failed to open source directory %s for reading: %s\n", srcdir, err.Error()) + } + files, err := dir.Readdirnames(-1) + if err != nil { + log.Fatalf("Failed to read source directory %s: %s\n", srcdir, err.Error()) + } + for _, file := range files { + if file != filepath.Base(scratch) { + log.Printf("Moving %s/%s to %s/%s.\n", srcdir, file, scratch, file) + err := os.Rename(filepath.Join(srcdir, file), filepath.Join(scratch, file)) + if err != nil { + log.Fatalf("Failed to move file %s to the temporary directory: %s\n", file, err.Error()) + } + } + } + + // create a new folder which we will add to GOPATH below + // Note we evaluate all symlinks here for consistency: otherwise os.Chdir below + // will follow links but other references to the path may not, which can lead to + // disagreements between GOPATH and the working directory. + realSrc, err := filepath.EvalSymlinks(srcdir) + if err != nil { + log.Fatalf("Failed to evaluate symlinks in %s: %s\n", srcdir, err.Error()) + } + + root := filepath.Join(realSrc, "root") + + // move source files to where Go expects them to be + newdir := filepath.Join(root, "src", importpath) + err = os.MkdirAll(filepath.Dir(newdir), 0755) + if err != nil { + log.Fatalf("Failed to create directory %s: %s\n", newdir, err.Error()) + } + log.Printf("Moving %s to %s.\n", scratch, newdir) + err = os.Rename(scratch, newdir) + if err != nil { + log.Fatalf("Failed to rename %s to %s: %s\n", scratch, newdir, err.Error()) + } + + // schedule restoring the contents of newdir to their original location after this function completes: + defer restoreRepoLayout(newdir, files, filepath.Base(scratch), srcdir) + + err = os.Chdir(newdir) + if err != nil { + log.Fatalf("Failed to chdir into %s: %s\n", newdir, err.Error()) + } + + // set up SEMMLE_PATH_TRANSFORMER to ensure paths in the source archive and the snapshot + // match the original source location, not the location we moved it to + pt, err := ioutil.TempFile("", "path-transformer") + if err != nil { + log.Fatalf("Unable to create path transformer file: %s.", err.Error()) + } + defer os.Remove(pt.Name()) + _, err = pt.WriteString("#" + realSrc + "\n" + newdir + "//\n") + if err != nil { + log.Fatalf("Unable to write path transformer file: %s.", err.Error()) + } + err = pt.Close() + if err != nil { + log.Fatalf("Unable to close path transformer file: %s.", err.Error()) + } + err = os.Setenv("SEMMLE_PATH_TRANSFORMER", pt.Name()) + if err != nil { + log.Fatalf("Unable to set SEMMLE_PATH_TRANSFORMER environment variable: %s.\n", err.Error()) + } + + // set/extend GOPATH + oldGopath := os.Getenv("GOPATH") + var newGopath string + if oldGopath != "" { + newGopath = strings.Join( + []string{root, oldGopath}, + string(os.PathListSeparator), + ) + } else { + newGopath = root + } + err = os.Setenv("GOPATH", newGopath) + if err != nil { + log.Fatalf("Unable to set GOPATH to %s: %s\n", newGopath, err.Error()) + } + log.Printf("GOPATH set to %s.\n", newGopath) + } + + // check whether an explicit dependency installation command was provided + inst := util.Getenv("CODEQL_EXTRACTOR_GO_BUILD_COMMAND", "LGTM_INDEX_BUILD_COMMAND") + shouldInstallDependencies := false + if inst == "" { + // try to build the project + buildSucceeded := autobuilder.Autobuild() + + // Build failed or there are still dependency errors; we'll try to install dependencies + // ourselves + if !buildSucceeded { + log.Println("Build failed, continuing to install dependencies.") + + shouldInstallDependencies = true + } else if util.DepErrors("./...", modMode.argsForGoVersion(getEnvGoSemVer())...) { + log.Println("Dependencies are still not resolving after the build, continuing to install dependencies.") + + shouldInstallDependencies = true + } + } else { + // write custom build commands into a script, then run it + var ( + ext = "" + header = "" + footer = "" + ) + if runtime.GOOS == "windows" { + ext = ".cmd" + header = "@echo on\n@prompt +$S\n" + footer = "\nIF %ERRORLEVEL% NEQ 0 EXIT" + } else { + ext = ".sh" + header = "#! /bin/bash\nset -xe +u\n" + } + script, err := ioutil.TempFile("", "go-build-command-*"+ext) + if err != nil { + log.Fatalf("Unable to create temporary script holding custom build commands: %s\n", err.Error()) + } + defer os.Remove(script.Name()) + _, err = script.WriteString(header + inst + footer) + if err != nil { + log.Fatalf("Unable to write to temporary script holding custom build commands: %s\n", err.Error()) + } + err = script.Close() + if err != nil { + log.Fatalf("Unable to close temporary script holding custom build commands: %s\n", err.Error()) + } + os.Chmod(script.Name(), 0700) + log.Println("Installing dependencies using custom build command.") + util.RunCmd(exec.Command(script.Name())) + } + + if modMode == ModVendor { + // test if running `go` with -mod=vendor works, and if it doesn't, try to fallback to -mod=mod + // or not set if the go version < 1.14. Note we check this post-build in case the build brings + // the vendor directory up to date. + if !checkVendor() { + modMode = ModMod + log.Println("The vendor directory is not consistent with the go.mod; not using vendored dependencies.") + } + } + + if shouldInstallDependencies { + if modMode == ModVendor { + log.Printf("Skipping dependency installation because a Go vendor directory was found.") + } else { + // automatically determine command to install dependencies + var install *exec.Cmd + if depMode == Dep { + // set up the dep cache if SEMMLE_CACHE is set + cacheDir := os.Getenv("SEMMLE_CACHE") + if cacheDir != "" { + depCacheDir := filepath.Join(cacheDir, "go", "dep") + log.Printf("Attempting to create dep cache dir %s\n", depCacheDir) + err := os.MkdirAll(depCacheDir, 0755) + if err != nil { + log.Printf("Failed to create dep cache directory: %s\n", err.Error()) + } else { + log.Printf("Setting dep cache directory to %s\n", depCacheDir) + err = os.Setenv("DEPCACHEDIR", depCacheDir) + if err != nil { + log.Println("Failed to set dep cache directory") + } else { + err = os.Setenv("DEPCACHEAGE", "720h") // 30 days + if err != nil { + log.Println("Failed to set dep cache age") + } + } + } + } + + if util.FileExists("Gopkg.lock") { + // if Gopkg.lock exists, don't update it and only vendor dependencies + install = exec.Command("dep", "ensure", "-v", "-vendor-only") + } else { + install = exec.Command("dep", "ensure", "-v") + } + log.Println("Installing dependencies using `dep ensure`.") + } else if depMode == Glide { + install = exec.Command("glide", "install") + log.Println("Installing dependencies using `glide install`") + } else { + // explicitly set go module support + if depMode == GoGetWithModules { + os.Setenv("GO111MODULE", "on") + } else if depMode == GoGetNoModules { + os.Setenv("GO111MODULE", "off") + } + + // get dependencies + install = exec.Command("go", "get", "-v", "./...") + log.Println("Installing dependencies using `go get -v ./...`.") + } + util.RunCmd(install) + } + } + + // extract + extractor, err := util.GetExtractorPath() + if err != nil { + log.Fatalf("Could not determine path of extractor: %v.\n", err) + } + + cwd, err := os.Getwd() + if err != nil { + log.Fatalf("Unable to determine current directory: %s\n", err.Error()) + } + + extractorArgs := []string{} + if depMode == GoGetWithModules { + extractorArgs = append(extractorArgs, modMode.argsForGoVersion(getEnvGoSemVer())...) + } + extractorArgs = append(extractorArgs, "./...") + + log.Printf("Running extractor command '%s %v' from directory '%s'.\n", extractor, extractorArgs, cwd) + cmd := exec.Command(extractor, extractorArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err = cmd.Run() + if err != nil { + log.Fatalf("Extraction failed: %s\n", err.Error()) + } +} diff --git a/go/extractor/cli/go-autobuilder/go-autobuilder_test.go b/go/extractor/cli/go-autobuilder/go-autobuilder_test.go new file mode 100644 index 00000000000..f4e8405fe36 --- /dev/null +++ b/go/extractor/cli/go-autobuilder/go-autobuilder_test.go @@ -0,0 +1,22 @@ +package main + +import "testing" + +func TestGetImportPathFromRepoURL(t *testing.T) { + tests := map[string]string{ + "git@github.com:github/codeql-go.git": "github.com/github/codeql-go", + "git@github.com:github/codeql-go": "github.com/github/codeql-go", + "https://github.com/github/codeql-go.git": "github.com/github/codeql-go", + "https://github.com:12345/github/codeql-go": "github.com/github/codeql-go", + "gitolite@some.url:some/repo": "some.url/some/repo", + "file:///C:/some/path": "", + "https:///no/hostname": "", + "https://hostnameonly": "", + } + for input, expected := range tests { + actual := getImportPathFromRepoURL(input) + if actual != expected { + t.Errorf("Expected getImportPathFromRepoURL(\"%s\") to be \"%s\", but got \"%s\".", input, expected, actual) + } + } +} diff --git a/go/extractor/cli/go-bootstrap/go-bootstrap.go b/go/extractor/cli/go-bootstrap/go-bootstrap.go new file mode 100644 index 00000000000..603da2b8027 --- /dev/null +++ b/go/extractor/cli/go-bootstrap/go-bootstrap.go @@ -0,0 +1,54 @@ +package main + +import ( + "fmt" + "io/ioutil" + "log" + "os" + "regexp" +) + +// A utility program for generating `project` and `variable` files for SemmleCore Go projects +// +// This program should not normally be run directly; it is usually executed as part of +// `odasa bootstrap`, and expects two files as arguments: a (partial) `variables` file and +// an empty file to be filled in with an `` element containing build steps. +// +// The `variables` file is extended with a definition of `LGTM_SRC` and, if it defines the +// `repository` variable, `SEMMLE_REPO_URL`. The only build step is an invocation of the +// Go autobuilder. +func main() { + vars := os.Args[1] + buildSteps := os.Args[2] + + haveRepo := false + content, err := ioutil.ReadFile(vars) + if err != nil { + log.Fatal(err) + } + re := regexp.MustCompile(`(^|\n)repository=`) + haveRepo = re.Find(content) != nil + + additionalVars := "LGTM_SRC=${src}\n" + if haveRepo { + additionalVars += "SEMMLE_REPO_URL=${repository}\n" + } + content = append(content, []byte(additionalVars)...) + err = ioutil.WriteFile(vars, content, 0644) + if err != nil { + log.Fatal(err) + } + + export := "LGTM_SRC" + if haveRepo { + export += ",SEMMLE_REPO_URL" + } + content = []byte(fmt.Sprintf(` + ${semmle_dist}/language-packs/go/tools/platform/${semmle_platform}/bin/go-autobuilder + +`, export)) + err = ioutil.WriteFile(buildSteps, content, 0644) + if err != nil { + log.Fatal(err) + } +} diff --git a/go/extractor/cli/go-build-runner/go-build-runner.go b/go/extractor/cli/go-build-runner/go-build-runner.go new file mode 100644 index 00000000000..118de5caf2e --- /dev/null +++ b/go/extractor/cli/go-build-runner/go-build-runner.go @@ -0,0 +1,36 @@ +package main + +import ( + "github.com/github/codeql-go/extractor/util" + "log" + "os" + "os/exec" + "path/filepath" + "runtime" + + "github.com/github/codeql-go/extractor/autobuilder" +) + +func main() { + // check if a build command has successfully extracted something + autobuilder.CheckExtracted = true + if autobuilder.Autobuild() { + return + } + + // if the autobuilder fails, invoke the extractor manually + // we cannot simply call `go build` here, because the tracer is not able to trace calls made by + // this binary + log.Printf("No build commands succeeded, falling back to go build ./...") + + mypath, err := os.Executable() + if err != nil { + log.Fatalf("Could not determine path of extractor: %v.\n", err) + } + extractor := filepath.Join(filepath.Dir(mypath), "go-extractor") + if runtime.GOOS == "windows" { + extractor = extractor + ".exe" + } + + util.RunCmd(exec.Command(extractor, "./...")) +} diff --git a/go/extractor/cli/go-extractor/go-extractor.go b/go/extractor/cli/go-extractor/go-extractor.go new file mode 100644 index 00000000000..59ab35da4a9 --- /dev/null +++ b/go/extractor/cli/go-extractor/go-extractor.go @@ -0,0 +1,132 @@ +package main + +import ( + "fmt" + "log" + "os" + "runtime" + "runtime/pprof" + "strings" + + "github.com/github/codeql-go/extractor" +) + +var cpuprofile, memprofile string + +func usage() { + fmt.Fprintf(os.Stderr, "%s is a program for building a snapshot of a Go code base.\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Usage:\n\n %s [...] [...] [--] ...\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Flags:\n\n") + fmt.Fprintf(os.Stderr, "--help Print this help.\n") +} + +func parseFlags(args []string, mimic bool) ([]string, []string) { + i := 0 + buildFlags := []string{} + for ; i < len(args) && strings.HasPrefix(args[i], "-"); i++ { + if args[i] == "--" { + i++ + break + } + + if !mimic { + // we're not in mimic mode, try to parse our arguments + switch args[i] { + case "--help": + usage() + os.Exit(0) + case "--mimic": + if i+1 < len(args) { + i++ + compiler := args[i] + log.Printf("Compiler: %s", compiler) + if i+1 < len(args) { + i++ + command := args[i] + if command == "build" || command == "install" || command == "run" { + log.Printf("Intercepting build") + return parseFlags(args[i+1:], true) + } else { + log.Printf("Non-build command '%s'; skipping", strings.Join(args[1:], " ")) + os.Exit(0) + } + } else { + log.Printf("Non-build command '%s'; skipping", strings.Join(args[1:], " ")) + os.Exit(0) + } + } else { + log.Fatalf("--mimic requires an argument, e.g. --mimic go") + } + } + } + + // parse go build flags + switch args[i] { + // skip `-o output` and `-i`, if applicable + case "-o": + if i+1 < len(args) { + i++ + } + case "-i": + case "-p", "-asmflags", "-buildmode", "-compiler", "-gccgoflags", "-gcflags", "-installsuffix", + "-ldflags", "-mod", "-modfile", "-pkgdir", "-tags", "-toolexec", "-overlay": + if i+1 < len(args) { + buildFlags = append(buildFlags, args[i], args[i+1]) + i++ + } else { + buildFlags = append(buildFlags, args[i]) + } + default: + if strings.HasPrefix(args[i], "-") { + buildFlags = append(buildFlags, args[i]) + } else { + // stop parsing if the argument is not a flag (and so is positional) + break + } + } + } + + cpuprofile = os.Getenv("CODEQL_EXTRACTOR_GO_CPU_PROFILE") + memprofile = os.Getenv("CODEQL_EXTRACTOR_GO_MEM_PROFILE") + + return buildFlags, args[i:] +} + +func main() { + buildFlags, patterns := parseFlags(os.Args[1:], false) + + if cpuprofile != "" { + f, err := os.Create(cpuprofile) + if err != nil { + log.Fatalf("Unable to create CPU profile: %v.", err) + } + defer f.Close() + if err := pprof.StartCPUProfile(f); err != nil { + log.Fatalf("Unable to start CPU profile: %v.", err) + } + defer pprof.StopCPUProfile() + } + + if len(patterns) == 0 { + log.Println("No packages explicitly provided, adding '.'") + patterns = []string{"."} + } + + log.Printf("Build flags: '%s'; patterns: '%s'\n", strings.Join(buildFlags, " "), strings.Join(patterns, " ")) + err := extractor.ExtractWithFlags(buildFlags, patterns) + if err != nil { + log.Fatalf("Error running go tooling: %s\n", err.Error()) + } + + if memprofile != "" { + f, err := os.Create(memprofile) + if err != nil { + log.Fatalf("Unable to create memory profile: %v", err) + } + defer f.Close() + runtime.GC() // get up-to-date statistics + if err := pprof.WriteHeapProfile(f); err != nil { + log.Fatal("Unable to write memory profile: ", err) + } + } +} diff --git a/go/extractor/cli/go-gen-dbscheme/go-gen-dbscheme.go b/go/extractor/cli/go-gen-dbscheme/go-gen-dbscheme.go new file mode 100644 index 00000000000..94831c2b451 --- /dev/null +++ b/go/extractor/cli/go-gen-dbscheme/go-gen-dbscheme.go @@ -0,0 +1,31 @@ +package main + +import ( + "fmt" + "os" + + "github.com/github/codeql-go/extractor/dbscheme" +) + +func usage() { + fmt.Fprintf(os.Stderr, "%s is a program for generating the dbscheme for CodeQL Go databases.\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Usage:\n\n %s \n\n", os.Args[0]) +} + +func main() { + if len(os.Args) != 2 { + usage() + os.Exit(2) + } + + out := os.Args[1] + + f, err := os.Create(out) + if err != nil { + fmt.Fprintf(os.Stderr, "Unable to open file %s for writing.\n", out) + os.Exit(1) + } + dbscheme.PrintDbScheme(f) + f.Close() + fmt.Printf("Dbscheme written to file %s.\n", out) +} diff --git a/go/extractor/cli/go-tokenizer/go-tokenizer.go b/go/extractor/cli/go-tokenizer/go-tokenizer.go new file mode 100644 index 00000000000..ec4a4057173 --- /dev/null +++ b/go/extractor/cli/go-tokenizer/go-tokenizer.go @@ -0,0 +1,57 @@ +package main + +import ( + "encoding/csv" + "flag" + "fmt" + "go/scanner" + "go/token" + "io/ioutil" + "log" + "os" + "strings" +) + +func main() { + flag.Parse() + + fs := token.NewFileSet() + csv := csv.NewWriter(os.Stdout) + defer csv.Flush() + + for _, fileName := range flag.Args() { + src, err := ioutil.ReadFile(fileName) + if err != nil { + log.Fatalf("Unable to read file %s.", fileName) + } + f := fs.AddFile(fileName, -1, len(src)) + + var s scanner.Scanner + s.Init(f, src, nil, 0) + for { + beginPos, tok, text := s.Scan() + + if strings.TrimSpace(text) != "" { + var fuzzyText string + if tok.IsLiteral() { + fuzzyText = tok.String() + } else { + fuzzyText = text + } + + endPos := f.Pos(f.Offset(beginPos) + len(text)) + beginLine := fmt.Sprintf("%d", f.Position(beginPos).Line) + beginColumn := fmt.Sprintf("%d", f.Position(beginPos).Column) + endLine := fmt.Sprintf("%d", f.Position(endPos).Line) + endColumn := fmt.Sprintf("%d", f.Position(endPos).Column) + err = csv.Write([]string{text, fuzzyText, beginLine, beginColumn, endLine, endColumn}) + if err != nil { + log.Fatalf("Unable to write CSV data: %v", err) + } + } + if tok == token.EOF { + break + } + } + } +} diff --git a/go/extractor/dbscheme/dbscheme.go b/go/extractor/dbscheme/dbscheme.go new file mode 100644 index 00000000000..923fda859db --- /dev/null +++ b/go/extractor/dbscheme/dbscheme.go @@ -0,0 +1,426 @@ +package dbscheme + +import ( + "fmt" + "io" + "log" + "reflect" + "strings" + + "github.com/github/codeql-go/extractor/trap" +) + +// A Type represents a database type +type Type interface { + def() string + ref() string + repr() string + valid(val interface{}) bool +} + +// A PrimitiveType represents a primitive dataase type +type PrimitiveType int + +const ( + // INT represents the primitive database type `int` + INT PrimitiveType = iota + // FLOAT represents the primitive database type `float` + FLOAT + // BOOLEAN represents the primitive database type `boolean` + BOOLEAN + // DATE represents the primitive database type `date` + DATE + // STRING represents the primitive database type `string` + STRING +) + +// A PrimaryKeyType represents a database type defined by a primary key column +type PrimaryKeyType struct { + name string +} + +// A UnionType represents a database type defined as the union of other database types +type UnionType struct { + name string + components []Type +} + +// An AliasType represents a database type which is an alias of another database type +type AliasType struct { + name string + underlying Type +} + +// A CaseType represents a database type defined by a primary key column with a supplementary kind column +type CaseType struct { + base Type + column string + branches []*BranchType +} + +// A BranchType represents one branch of a case type +type BranchType struct { + idx int + name string +} + +func (pt PrimitiveType) def() string { + return "" +} + +func (pt PrimitiveType) ref() string { + switch pt { + case INT: + return "int" + case FLOAT: + return "float" + case BOOLEAN: + return "boolean" + case DATE: + return "date" + case STRING: + return "string" + default: + panic(fmt.Sprintf("Unexpected primitive type %d", pt)) + } +} + +func (pt PrimitiveType) repr() string { + switch pt { + case INT: + return "int" + case FLOAT: + return "float" + case BOOLEAN: + return "boolean" + case DATE: + return "date" + case STRING: + return "string" + default: + panic(fmt.Sprintf("Unexpected primitive type %d", pt)) + } +} + +func (pt PrimitiveType) valid(value interface{}) bool { + switch value.(type) { + case int: + return pt == INT + case float64: + return pt == FLOAT + case bool: + return pt == BOOLEAN + case string: + return pt == STRING + } + return false +} + +func (pkt PrimaryKeyType) def() string { + return "" +} + +func (pkt PrimaryKeyType) ref() string { + return pkt.name +} + +func (pkt PrimaryKeyType) repr() string { + return "int" +} + +func (pkt PrimaryKeyType) valid(value interface{}) bool { + _, ok := value.(trap.Label) + return ok +} + +func (ut UnionType) def() string { + var b strings.Builder + nl := 0 + fmt.Fprintf(&b, "%s = ", ut.name) + for i, comp := range ut.components { + if i > 0 { + if i < len(ut.components)-1 && b.Len()-nl > 100 { + fmt.Fprintf(&b, "\n%s", strings.Repeat(" ", len(ut.name))) + nl = b.Len() + } + fmt.Fprint(&b, " | ") + } + fmt.Fprint(&b, comp.ref()) + } + fmt.Fprint(&b, ";") + return b.String() +} + +func (ut UnionType) ref() string { + return ut.name +} + +func (ut UnionType) repr() string { + return "int" +} + +func (ut UnionType) valid(value interface{}) bool { + _, ok := value.(trap.Label) + return ok +} + +func (at AliasType) def() string { + return at.name + " = " + at.underlying.ref() + ";" +} + +func (at AliasType) ref() string { + return at.name +} + +func (at AliasType) repr() string { + return at.underlying.repr() +} + +func (at AliasType) valid(value interface{}) bool { + return at.underlying.valid(value) +} + +func (ct CaseType) def() string { + var b strings.Builder + fmt.Fprintf(&b, "case %s.%s of", ct.base.ref(), ct.column) + sep := " " + for _, branch := range ct.branches { + fmt.Fprintf(&b, "\n%s%s", sep, branch.def()) + sep = "| " + } + fmt.Fprint(&b, ";") + return b.String() +} + +func (ct CaseType) ref() string { + panic("case types do not have a name") +} + +func (ct CaseType) repr() string { + return "int" +} + +func (ct CaseType) valid(value interface{}) bool { + _, ok := value.(trap.Label) + return ok +} + +func (bt BranchType) def() string { + return fmt.Sprintf("%d = %s", bt.idx, bt.name) +} + +func (bt BranchType) ref() string { + return bt.name +} + +func (bt BranchType) repr() string { + return "int" +} + +func (bt BranchType) valid(value interface{}) bool { + _, ok := value.(trap.Label) + return ok +} + +// Index returns the numeric index of this branch type +func (bt BranchType) Index() int { + return bt.idx +} + +// A Column represents a column in a database table +type Column struct { + columnName string + columnType Type + unique bool + ref bool +} + +func (col Column) String() string { + var b strings.Builder + if col.unique { + fmt.Fprint(&b, "unique ") + } + fmt.Fprintf(&b, "%s %s: %s", col.columnType.repr(), col.columnName, col.columnType.ref()) + if col.ref { + fmt.Fprint(&b, " ref") + } + return b.String() +} + +// Key returns a new column that is the same as this column, but has the `key` flag set to `true` +func (col Column) Key() Column { + return Column{col.columnName, col.columnType, true, false} +} + +// Unique returns a new column that is the same as this column, but has the `unique` flag set to `true` +func (col Column) Unique() Column { + return Column{col.columnName, col.columnType, true, col.ref} +} + +// EntityColumn constructs a column with name `columnName` holding entities of type `columnType` +func EntityColumn(columnType Type, columnName string) Column { + return Column{columnName, columnType, false, true} +} + +// StringColumn constructs a column with name `columnName` holding string values +func StringColumn(columnName string) Column { + return Column{columnName, STRING, false, true} +} + +// IntColumn constructs a column with name `columnName` holding integer values +func IntColumn(columnName string) Column { + return Column{columnName, INT, false, true} +} + +// FloatColumn constructs a column with name `columnName` holding floating point number values +func FloatColumn(columnName string) Column { + return Column{columnName, FLOAT, false, true} +} + +// A Table represents a database table +type Table struct { + name string + schema []Column + keysets [][]string +} + +// KeySet adds `keys` as a keyset to this table +func (tbl *Table) KeySet(keys ...string) *Table { + tbl.keysets = append(tbl.keysets, keys) + return tbl +} + +func (tbl Table) String() string { + var b strings.Builder + for _, keyset := range tbl.keysets { + fmt.Fprint(&b, "#keyset[") + sep := "" + for _, key := range keyset { + fmt.Fprintf(&b, "%s%s", sep, key) + sep = ", " + } + fmt.Fprint(&b, "]\n") + } + fmt.Fprint(&b, tbl.name) + fmt.Fprint(&b, "(") + nl := 0 + for i, column := range tbl.schema { + if i > 0 { + // wrap >100 char lines + if i < len(tbl.schema)-1 && b.Len()-nl > 100 { + fmt.Fprintf(&b, ",\n%s", strings.Repeat(" ", len(tbl.name)+1)) + nl = b.Len() + } else { + fmt.Fprint(&b, ", ") + } + } + fmt.Fprint(&b, column.String()) + } + fmt.Fprint(&b, ");") + return b.String() +} + +// Emit outputs a tuple of `values` for this table using trap writer `tw` +// and panicks if the tuple does not have the right schema +func (tbl Table) Emit(tw *trap.Writer, values ...interface{}) { + if ncol, nval := len(tbl.schema), len(values); ncol != nval { + log.Fatalf("wrong number of values for table %s; expected %d, but got %d", tbl.name, ncol, nval) + } + for i, col := range tbl.schema { + if !col.columnType.valid(values[i]) { + panic(fmt.Sprintf("Invalid value for column %d of table %s; expected a %s, but got %s which is a %s", i, tbl.name, col.columnType.ref(), values[i], reflect.TypeOf(values[i]))) + } + } + tw.Emit(tbl.name, values) +} + +var tables = []*Table{} +var types = []Type{} +var defaultSnippets = []string{} + +// NewTable constructs a new table with the given `name` and `columns` +func NewTable(name string, columns ...Column) *Table { + tbl := &Table{name, columns, [][]string{}} + tables = append(tables, tbl) + return tbl +} + +// NewPrimaryKeyType constructs a new primary key type with the given `name`, +// and adds it to the union types `parents` (if any) +func NewPrimaryKeyType(name string, parents ...*UnionType) *PrimaryKeyType { + tp := &PrimaryKeyType{name} + types = append(types, tp) + for _, parent := range parents { + parent.components = append(parent.components, tp) + } + return tp +} + +// NewUnionType constructs a new union type with the given `name`, +// and adds it to the union types `parents` (if any) +func NewUnionType(name string, parents ...*UnionType) *UnionType { + tp := &UnionType{name, []Type{}} + types = append(types, tp) + for _, parent := range parents { + parent.components = append(parent.components, tp) + } + return tp +} + +// AddChild adds the type with given `name` to the union type. +// This is useful if a type defined in a snippet should be a child of a type defined in Go. +func (parent *UnionType) AddChild(name string) bool { + tp := &PrimaryKeyType{name} + // don't add tp to types; it's expected that it's already in the db somehow. + parent.components = append(parent.components, tp) + return true +} + +// NewAliasType constructs a new alias type with the given `name` that aliases `underlying` +func NewAliasType(name string, underlying Type) *AliasType { + tp := &AliasType{name, underlying} + types = append(types, tp) + return tp +} + +// NewCaseType constructs a new case type on the given `base` type whose discriminator values +// come from `column` +func NewCaseType(base Type, column string) *CaseType { + tp := &CaseType{base, column, []*BranchType{}} + types = append(types, tp) + return tp +} + +// NewBranch adds a new branch with the given `name` to this case type +// and adds it to the union types `parents` (if any) +func (ct *CaseType) NewBranch(name string, parents ...*UnionType) *BranchType { + tp := &BranchType{len(ct.branches), name} + ct.branches = append(ct.branches, tp) + for _, parent := range parents { + parent.components = append(parent.components, tp) + } + return tp +} + +// AddDefaultSnippet adds the given text `snippet` to the schema of this database +func AddDefaultSnippet(snippet string) bool { + defaultSnippets = append(defaultSnippets, snippet) + return true +} + +// PrintDbScheme prints the schema of this database to the writer `w` +func PrintDbScheme(w io.Writer) { + fmt.Fprintf(w, "/** Auto-generated dbscheme; do not edit. */\n\n") + for _, snippet := range defaultSnippets { + fmt.Fprintf(w, "%s\n", snippet) + } + for _, table := range tables { + fmt.Fprintf(w, "%s\n\n", table.String()) + } + for _, tp := range types { + def := tp.def() + if def != "" { + fmt.Fprintf(w, "%s\n\n", def) + } + } +} diff --git a/go/extractor/dbscheme/tables.go b/go/extractor/dbscheme/tables.go new file mode 100644 index 00000000000..63332aa3b59 --- /dev/null +++ b/go/extractor/dbscheme/tables.go @@ -0,0 +1,1222 @@ +package dbscheme + +import ( + "go/ast" + "go/token" + gotypes "go/types" + + "golang.org/x/tools/go/packages" +) + +var defaultSnippet = AddDefaultSnippet(` +/** Duplicate code **/ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity; + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/** External data **/ + +externalData( + int id : @externalDataElement, + varchar(900) path : string ref, + int column: int ref, + varchar(900) value : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); +`) + +// Copied directly from the XML dbscheme +var xmlSnippet = AddDefaultSnippet(` +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; +`) + +// Compiler diagnostic tables +var CompilationType = NewPrimaryKeyType("@compilation") + +/** + * An invocation of the compiler. Note that more than one file may be + * compiled per invocation. For example, this command compiles three + * source files: + * + * go build a.go b.go c.go + * + * The `id` simply identifies the invocation, while `cwd` is the working + * directory from which the compiler was invoked. + */ +var CompilationsTable = NewTable("compilations", + EntityColumn(CompilationType, "id").Key(), + StringColumn("cwd"), +) + +/** + * The arguments that were passed to the extractor for a compiler + * invocation. If `id` is for the compiler invocation + * + * go build a.go b.go c.go + * + * then typically there will be rows for + * + * num | arg + * --- | --- + * 0 | *path to extractor* + * 1 | `--` + * 2 | a.go + * 3 | b.go + * 4 | c.go + */ +var CompilationArgsTable = NewTable("compilation_args", + EntityColumn(CompilationType, "id"), + IntColumn("num"), + StringColumn("arg"), +).KeySet("id", "num") + +/** + * The source files that are compiled by a compiler invocation. + * If `id` is for the compiler invocation + * + * go build a.go b.go c.go + * + * then there will be rows for + * + * num | arg + * --- | --- + * 0 | a.go + * 1 | b.go + * 2 | c.go + */ +var CompilationCompilingFilesTable = NewTable("compilation_compiling_files", + EntityColumn(CompilationType, "id"), + IntColumn("num"), + EntityColumn(FileType, "file"), +).KeySet("id", "num") + +type CompilationTypeKind int + +const ( + FRONTEND_CPU_SECONDS = iota + FRONTEND_ELAPSED_SECONDS + EXTRACTOR_CPU_SECONDS + EXTRACTOR_ELAPSED_SECONDS +) + +/** + * The time taken by the extractor for a compiler invocation. + * + * For each file `num`, there will be rows for + * + * kind | seconds + * ---- | --- + * 1 | CPU seconds used by the extractor frontend + * 2 | Elapsed seconds during the extractor frontend + * 3 | CPU seconds used by the extractor backend + * 4 | Elapsed seconds during the extractor backend + */ +var CompilationTimeTable = NewTable("compilation_time", + EntityColumn(CompilationType, "id"), + IntColumn("num"), + IntColumn("kind"), + FloatColumn("secs"), +).KeySet("id", "num", "kind") + +var DiagnosticType = NewPrimaryKeyType("@diagnostic") + +/** + * An error or warning generated by the extractor. + * The diagnostic message `diagnostic` was generated during compiler + * invocation `compilation`, and is the `file_number_diagnostic_number`th + * message generated while extracting the `file_number`th file of that + * invocation. + */ +var DiagnosticForTable = NewTable("diagnostic_for", + EntityColumn(DiagnosticType, "diagnostic").Unique(), + EntityColumn(CompilationType, "compilation"), + IntColumn("file_number"), + IntColumn("file_number_diagnostic_number"), +) + +/** + * If extraction was successful, then `cpu_seconds` and + * `elapsed_seconds` are the CPU time and elapsed time (respectively) + * that extraction took for compiler invocation `id`. + */ +var CompilationFinishedTable = NewTable("compilation_finished", + EntityColumn(CompilationType, "id").Unique(), + FloatColumn("cpu_seconds"), + FloatColumn("elapsed_seconds"), +) + +var DiagnosticsTable = NewTable("diagnostics", + EntityColumn(DiagnosticType, "id").Key(), + IntColumn("severity"), + StringColumn("error_tag"), + StringColumn("error_message"), + StringColumn("full_error_message"), + EntityColumn(LocationType, "location"), +) + +// ContainerType is the type of files and folders +var ContainerType = NewUnionType("@container") + +// LocatableType is the type of program entities that have locations +var LocatableType = NewUnionType("@locatable") + +// Adds xmllocatable as a locatable +var XmlLocatableAsLocatable = LocatableType.AddChild("@xmllocatable") + +// NodeType is the type of AST nodes +var NodeType = NewUnionType("@node", LocatableType) + +// DocumentableType is the type of AST nodes to which documentation can be attached +var DocumentableType = NewUnionType("@documentable", NodeType) + +// ExprParentType is the type of AST nodes that can have expressions as children +var ExprParentType = NewUnionType("@exprparent", NodeType) + +// ModExprParentType is the type of go.mod nodes that can have go.mod expressions as children +var ModExprParentType = NewUnionType("@modexprparent", NodeType) + +// FieldParentType is the type of AST nodes that can have fields as children +var FieldParentType = NewUnionType("@fieldparent", NodeType) + +// StmtParentType is the type of AST nodes that can have statements as children +var StmtParentType = NewUnionType("@stmtparent", NodeType) + +// DeclParentType is the type of AST nodes that can have declarations as children +var DeclParentType = NewUnionType("@declparent", NodeType) + +// TypeParamDeclParentType is the type of AST nodes that can have type parameter declarations as children +var TypeParamDeclParentType = NewUnionType("@typeparamdeclparent", NodeType) + +// FuncDefType is the type of AST nodes that define functions, that is, function +// declarations and function literals +var FuncDefType = NewUnionType("@funcdef", StmtParentType, ExprParentType) + +// ScopeNodeType is the type of AST nodes that may have a scope attached to them +var ScopeNodeType = NewUnionType("@scopenode", NodeType) + +// LocationDefaultType is the type of source locations +var LocationDefaultType = NewPrimaryKeyType("@location_default") + +// FileType is the type of file AST nodes +var FileType = NewPrimaryKeyType("@file", ContainerType, DocumentableType, ExprParentType, ModExprParentType, DeclParentType, ScopeNodeType) + +// FolderType is the type of folders +var FolderType = NewPrimaryKeyType("@folder", ContainerType) + +// CommentGroupType is the type of comment groups +var CommentGroupType = NewPrimaryKeyType("@comment_group", NodeType) + +// CommentType is the type of comments +var CommentType = NewPrimaryKeyType("@comment", NodeType) + +// ExprType is the type of expression AST nodes +var ExprType = NewPrimaryKeyType("@expr", ExprParentType) + +// FieldType is the type of field AST nodes +var FieldType = NewPrimaryKeyType("@field", DocumentableType, ExprParentType) + +// StmtType is the type of statement AST nodes +var StmtType = NewPrimaryKeyType("@stmt", ExprParentType, StmtParentType) + +// DeclType is the type of declaration AST nodes +var DeclType = NewPrimaryKeyType("@decl", ExprParentType, StmtParentType, FieldParentType) + +// TypeParamDeclType is the type of type parameter declaration AST nodes +var TypeParamDeclType = NewPrimaryKeyType("@typeparamdecl", DocumentableType, ExprParentType) + +// SpecType is the type of spec AST nodes +var SpecType = NewPrimaryKeyType("@spec", ExprParentType, DocumentableType) + +// TypeType is the type of types +var TypeType = NewPrimaryKeyType("@type") + +// LocationType is an alias for LocationDefaultType +var LocationType = NewAliasType("@location", LocationDefaultType) + +// SourceLineType is an alias for LocatableType +var SourceLineType = NewAliasType("@sourceline", LocatableType) + +// CommentKind is a case type for distinguishing different kinds of comments +var CommentKind = NewCaseType(CommentType, "kind") + +// SlashSlashComment is the type of single-line comments starting with a double slash +var SlashSlashComment = CommentKind.NewBranch("@slashslashcomment") + +// SlashStarComment is the type of block comments delimited by stars and slashes +var SlashStarComment = CommentKind.NewBranch("@slashstarcomment") + +// ExprKind is a case type for distinguishing different kinds of expression AST nodes +var ExprKind = NewCaseType(ExprType, "kind") + +// BadExpr is type of bad (that is, unparseable) expression AST nodes +var BadExpr = ExprKind.NewBranch("@badexpr") + +// IdentExpr is the type of identifier expression AST nodes +var IdentExpr = ExprKind.NewBranch("@ident") + +// EllipsisExpr is the type of ellipsis expression AST nodes +var EllipsisExpr = ExprKind.NewBranch("@ellipsis") + +// BasicLitExpr is the type of basic (that is, primitive) literal expression AST nodes +var BasicLitExpr = NewUnionType("@basiclit") + +// IntLitExpr is a case type for dishinguishing different kinds of literal expression AST nodes +var IntLitExpr = ExprKind.NewBranch("@intlit", BasicLitExpr) + +// FloatLitExpr is the type of floating-point literal expression AST nodes +var FloatLitExpr = ExprKind.NewBranch("@floatlit", BasicLitExpr) + +// ImagLitExpr is the type of imaginary literal expression AST nodes +var ImagLitExpr = ExprKind.NewBranch("@imaglit", BasicLitExpr) + +// CharLitExpr is the type of character literal expression AST nodes +var CharLitExpr = ExprKind.NewBranch("@charlit", BasicLitExpr) + +// StringLitExpr is the type of string literal expression AST nodes +var StringLitExpr = ExprKind.NewBranch("@stringlit", BasicLitExpr) + +// FuncLitExpr is the type of function literal expression AST nodes +var FuncLitExpr = ExprKind.NewBranch("@funclit", FuncDefType) + +// CompositeLitExpr is the type of composite literal expression AST nodes +var CompositeLitExpr = ExprKind.NewBranch("@compositelit") + +// ParenExpr is the type of parenthesis expression AST nodes +var ParenExpr = ExprKind.NewBranch("@parenexpr") + +// SelectorExpr is the type of selector expression AST nodes +var SelectorExpr = ExprKind.NewBranch("@selectorexpr") + +// IndexExpr is the type of AST nodes for index expressions and generic type +// instantiation expressions with one type argument. Note that syntactically +// unambiguous generic instantiations will be extracted as +// `GenericTypeInstantiationExpr`. +var IndexExpr = ExprKind.NewBranch("@indexexpr") + +// GenericFunctionInstantiationExpr is the type of AST nodes that represent an instantiation +// of a generic type. These correspond to some index expression AST nodes and all index +// list expression AST nodes. +var GenericFunctionInstantiationExpr = ExprKind.NewBranch("@genericfunctioninstantiationexpr") + +// GenericTypeInstantiationExpr is the type of AST nodes that represent an instantiation +// of a generic type. These correspond to some index expression AST nodes and all index +// list expression AST nodes. Note some syntactically ambiguous instantations are +// extracted as an `IndexExpr` to be disambiguated in QL later. +var GenericTypeInstantiationExpr = ExprKind.NewBranch("@generictypeinstantiationexpr") + +// SliceExpr is the type of slice expression AST nodes +var SliceExpr = ExprKind.NewBranch("@sliceexpr") + +// TypeAssertExpr is the type of type assertion expression AST nodes +var TypeAssertExpr = ExprKind.NewBranch("@typeassertexpr") + +// CallOrConversionExpr is the type of call and conversion expression AST nodes +// (which cannot be distinguished by purely syntactic criteria) +var CallOrConversionExpr = ExprKind.NewBranch("@callorconversionexpr") + +// StarExpr is the type of star expression AST nodes +var StarExpr = ExprKind.NewBranch("@starexpr") + +// OperatorExpr is the type of operator expression AST nodes +var OperatorExpr = NewUnionType("@operatorexpr") + +// LogicalExpr is the type of logical operator expression AST nodes +var LogicalExpr = NewUnionType("@logicalexpr", OperatorExpr) + +// ArithmeticExpr is the type of arithmetic operator expression AST nodes +var ArithmeticExpr = NewUnionType("@arithmeticexpr", OperatorExpr) + +// BitwiseExpr is the type of bitwise operator expression AST nodes +var BitwiseExpr = NewUnionType("@bitwiseexpr", OperatorExpr) + +// UnaryExpr is the type of unary operator expression AST nodes +var UnaryExpr = NewUnionType("@unaryexpr", OperatorExpr) + +// LogicalUnaryExpr is the type of logical unary operator expression AST nodes +var LogicalUnaryExpr = NewUnionType("@logicalunaryexpr", UnaryExpr, LogicalExpr) + +// BitwiseUnaryExpr is the type of bitwise unary operator expression AST nodes +var BitwiseUnaryExpr = NewUnionType("@bitwiseunaryexpr", UnaryExpr, BitwiseExpr) + +// ArithmeticUnaryExpr is the type of arithmetic unary operator expression AST nodes +var ArithmeticUnaryExpr = NewUnionType("@arithmeticunaryexpr", UnaryExpr, ArithmeticExpr) + +// BinaryExpr is the type of binary operator expression AST nodes +var BinaryExpr = NewUnionType("@binaryexpr", OperatorExpr) + +// LogicalBinaryExpr is the type of logical binary operator expression AST nodes +var LogicalBinaryExpr = NewUnionType("@logicalbinaryexpr", BinaryExpr, LogicalExpr) + +// BitwiseBinaryExpr is the type of bitwise binary operator expression AST nodes +var BitwiseBinaryExpr = NewUnionType("@bitwisebinaryexpr", BinaryExpr, BitwiseExpr) + +// ArithmeticBinaryExpr is the type of arithmetic binary operator expression AST nodes +var ArithmeticBinaryExpr = NewUnionType("@arithmeticbinaryexpr", BinaryExpr, ArithmeticExpr) + +// ShiftExpr is the type of shift operator expression AST nodes +var ShiftExpr = NewUnionType("@shiftexpr", BitwiseBinaryExpr) + +// Comparison is the type of comparison operator expression AST nodes +var Comparison = NewUnionType("@comparison", BinaryExpr) + +// EqualityTest is the type of equality operator expression AST nodes +var EqualityTest = NewUnionType("@equalitytest", Comparison) + +// RelationalComparison is the type of relational operator expression AST nodes +var RelationalComparison = NewUnionType("@relationalcomparison", Comparison) + +// KeyValueExpr is the type of key-value expression AST nodes +var KeyValueExpr = ExprKind.NewBranch("@keyvalueexpr") + +// ArrayTypeExpr is the type of array type AST nodes +var ArrayTypeExpr = ExprKind.NewBranch("@arraytypeexpr") + +// StructTypeExpr is the type of struct type AST nodes +var StructTypeExpr = ExprKind.NewBranch("@structtypeexpr", FieldParentType) + +// FuncTypeExpr is the type of function type AST nodes +var FuncTypeExpr = ExprKind.NewBranch("@functypeexpr", FieldParentType, ScopeNodeType) + +// InterfaceTypeExpr is the type of interface type AST nodes +var InterfaceTypeExpr = ExprKind.NewBranch("@interfacetypeexpr", FieldParentType) + +// MapTypeExpr is the type of map type AST nodes +var MapTypeExpr = ExprKind.NewBranch("@maptypeexpr") + +// TypeSetLiteralExpr is the type of type set literal type AST nodes +var TypeSetLiteralExpr = ExprKind.NewBranch("@typesetliteralexpr") + +// ChanTypeExpr is the type of channel type AST nodes +var ChanTypeExpr = NewUnionType("@chantypeexpr") + +// UnaryExprs is a map from unary operator tokens to the corresponding AST node type +var UnaryExprs = map[token.Token]*BranchType{ + token.ADD: ExprKind.NewBranch("@plusexpr", ArithmeticUnaryExpr), + token.SUB: ExprKind.NewBranch("@minusexpr", ArithmeticUnaryExpr), + token.NOT: ExprKind.NewBranch("@notexpr", LogicalUnaryExpr), + token.XOR: ExprKind.NewBranch("@complementexpr", BitwiseUnaryExpr), + token.MUL: ExprKind.NewBranch("@derefexpr", UnaryExpr), + token.AND: ExprKind.NewBranch("@addressexpr", UnaryExpr), + token.ARROW: ExprKind.NewBranch("@arrowexpr", UnaryExpr), +} + +// BinaryExprs is a map from binary operator tokens to the corresponding AST node type +var BinaryExprs = map[token.Token]*BranchType{ + token.LOR: ExprKind.NewBranch("@lorexpr", LogicalBinaryExpr), + token.LAND: ExprKind.NewBranch("@landexpr", LogicalBinaryExpr), + token.EQL: ExprKind.NewBranch("@eqlexpr", EqualityTest), + token.NEQ: ExprKind.NewBranch("@neqexpr", EqualityTest), + token.LSS: ExprKind.NewBranch("@lssexpr", RelationalComparison), + token.LEQ: ExprKind.NewBranch("@leqexpr", RelationalComparison), + token.GTR: ExprKind.NewBranch("@gtrexpr", RelationalComparison), + token.GEQ: ExprKind.NewBranch("@geqexpr", RelationalComparison), + token.ADD: ExprKind.NewBranch("@addexpr", ArithmeticBinaryExpr), + token.SUB: ExprKind.NewBranch("@subexpr", ArithmeticBinaryExpr), + token.OR: ExprKind.NewBranch("@orexpr", BitwiseBinaryExpr), + token.XOR: ExprKind.NewBranch("@xorexpr", BitwiseBinaryExpr), + token.MUL: ExprKind.NewBranch("@mulexpr", ArithmeticBinaryExpr), + token.QUO: ExprKind.NewBranch("@quoexpr", ArithmeticBinaryExpr), + token.REM: ExprKind.NewBranch("@remexpr", ArithmeticBinaryExpr), + token.SHL: ExprKind.NewBranch("@shlexpr", ShiftExpr), + token.SHR: ExprKind.NewBranch("@shrexpr", ShiftExpr), + token.AND: ExprKind.NewBranch("@andexpr", BitwiseBinaryExpr), + token.AND_NOT: ExprKind.NewBranch("@andnotexpr", BitwiseBinaryExpr), +} + +// ChanTypeExprs is a map from channel type expressions to the corresponding AST node type +var ChanTypeExprs = map[ast.ChanDir]*BranchType{ + ast.SEND: ExprKind.NewBranch("@sendchantypeexpr", ChanTypeExpr), + ast.RECV: ExprKind.NewBranch("@recvchantypeexpr", ChanTypeExpr), + ast.SEND | ast.RECV: ExprKind.NewBranch("@sendrcvchantypeexpr", ChanTypeExpr), +} + +// ErrorExpr is an AST node type that is not used anywhere +var ErrorExpr = ExprKind.NewBranch("@errorexpr") + +// StmtKind is a case type for distinguishing different kinds of statement AST nodes +var StmtKind = NewCaseType(StmtType, "kind") + +// BadStmtType is the type of bad (that is, unparseable) statement AST nodes +var BadStmtType = StmtKind.NewBranch("@badstmt") + +// DeclStmtType is the type of declaration statement AST nodes +var DeclStmtType = StmtKind.NewBranch("@declstmt", DeclParentType) + +// EmptyStmtType is the type of empty statement AST nodes +var EmptyStmtType = StmtKind.NewBranch("@emptystmt") + +// LabeledStmtType is the type of labeled statement AST nodes +var LabeledStmtType = StmtKind.NewBranch("@labeledstmt") + +// ExprStmtType is the type of expressio statemement AST nodes +var ExprStmtType = StmtKind.NewBranch("@exprstmt") + +// SendStmtType is the type of send statement AST nodes +var SendStmtType = StmtKind.NewBranch("@sendstmt") + +// IncDecStmtType is the type of increment/decrement statement AST nodes +var IncDecStmtType = NewUnionType("@incdecstmt") + +// IncStmtType is the type of increment statement AST nodes +var IncStmtType = StmtKind.NewBranch("@incstmt", IncDecStmtType) + +// DecStmtType is the type of decrement statement AST nodes +var DecStmtType = StmtKind.NewBranch("@decstmt", IncDecStmtType) + +// AssignmentType is the type of assignment statement AST nodes +var AssignmentType = NewUnionType("@assignment") + +// SimpleAssignStmtType is the type of simple (i.e., non-compound) assignment statement AST nodes +var SimpleAssignStmtType = NewUnionType("@simpleassignstmt", AssignmentType) + +// CompoundAssignStmtType is the type of compound assignment statement AST nodes +var CompoundAssignStmtType = NewUnionType("@compoundassignstmt", AssignmentType) + +// GoStmtType is the type of go statement AST nodes +var GoStmtType = StmtKind.NewBranch("@gostmt") + +// DeferStmtType is the type of defer statement AST nodes +var DeferStmtType = StmtKind.NewBranch("@deferstmt") + +// ReturnStmtType is the type of return statement AST nodes +var ReturnStmtType = StmtKind.NewBranch("@returnstmt") + +// BranchStmtType is the type of branch statement AST nodes +var BranchStmtType = NewUnionType("@branchstmt") + +// BreakStmtType is the type of break statement AST nodes +var BreakStmtType = StmtKind.NewBranch("@breakstmt", BranchStmtType) + +// ContinueStmtType is the type of continue statement AST nodes +var ContinueStmtType = StmtKind.NewBranch("@continuestmt", BranchStmtType) + +// GotoStmtType is the type of goto statement AST nodes +var GotoStmtType = StmtKind.NewBranch("@gotostmt", BranchStmtType) + +// FallthroughStmtType is the type of fallthrough statement AST nodes +var FallthroughStmtType = StmtKind.NewBranch("@fallthroughstmt", BranchStmtType) + +// BlockStmtType is the type of block statement AST nodes +var BlockStmtType = StmtKind.NewBranch("@blockstmt", ScopeNodeType) + +// IfStmtType is the type of if statement AST nodes +var IfStmtType = StmtKind.NewBranch("@ifstmt", ScopeNodeType) + +// CaseClauseType is the type of case clause AST nodes +var CaseClauseType = StmtKind.NewBranch("@caseclause", ScopeNodeType) + +// SwitchStmtType is the type of switch statement AST nodes, covering both expression switch and type switch +var SwitchStmtType = NewUnionType("@switchstmt", ScopeNodeType) + +// ExprSwitchStmtType is the type of expression-switch statement AST nodes +var ExprSwitchStmtType = StmtKind.NewBranch("@exprswitchstmt", SwitchStmtType) + +// TypeSwitchStmtType is the type of type-switch statement AST nodes +var TypeSwitchStmtType = StmtKind.NewBranch("@typeswitchstmt", SwitchStmtType) + +// CommClauseType is the type of comm clause AST ndoes +var CommClauseType = StmtKind.NewBranch("@commclause", ScopeNodeType) + +// SelectStmtType is the type of select statement AST nodes +var SelectStmtType = StmtKind.NewBranch("@selectstmt") + +// LoopStmtType is the type of loop statement AST nodes (including for statements and range statements) +var LoopStmtType = NewUnionType("@loopstmt", ScopeNodeType) + +// ForStmtType is the type of for statement AST nodes +var ForStmtType = StmtKind.NewBranch("@forstmt", LoopStmtType) + +// RangeStmtType is the type of range statement AST nodes +var RangeStmtType = StmtKind.NewBranch("@rangestmt", LoopStmtType) + +// AssignStmtTypes is a map from assignmnt operator tokens to corresponding AST node types +var AssignStmtTypes = map[token.Token]*BranchType{ + token.ASSIGN: StmtKind.NewBranch("@assignstmt", SimpleAssignStmtType), + token.DEFINE: StmtKind.NewBranch("@definestmt", SimpleAssignStmtType), + token.ADD_ASSIGN: StmtKind.NewBranch("@addassignstmt", CompoundAssignStmtType), + token.SUB_ASSIGN: StmtKind.NewBranch("@subassignstmt", CompoundAssignStmtType), + token.MUL_ASSIGN: StmtKind.NewBranch("@mulassignstmt", CompoundAssignStmtType), + token.QUO_ASSIGN: StmtKind.NewBranch("@quoassignstmt", CompoundAssignStmtType), + token.REM_ASSIGN: StmtKind.NewBranch("@remassignstmt", CompoundAssignStmtType), + token.AND_ASSIGN: StmtKind.NewBranch("@andassignstmt", CompoundAssignStmtType), + token.OR_ASSIGN: StmtKind.NewBranch("@orassignstmt", CompoundAssignStmtType), + token.XOR_ASSIGN: StmtKind.NewBranch("@xorassignstmt", CompoundAssignStmtType), + token.SHL_ASSIGN: StmtKind.NewBranch("@shlassignstmt", CompoundAssignStmtType), + token.SHR_ASSIGN: StmtKind.NewBranch("@shrassignstmt", CompoundAssignStmtType), + token.AND_NOT_ASSIGN: StmtKind.NewBranch("@andnotassignstmt", CompoundAssignStmtType), +} + +// DeclKind is a case type for distinguishing different kinds of declaration AST nodes +var DeclKind = NewCaseType(DeclType, "kind") + +// BadDeclType is the type of bad (that is, unparseable) declaration AST nodes +var BadDeclType = DeclKind.NewBranch("@baddecl") + +// GenDeclType is the type of generic declaration AST nodes +var GenDeclType = NewUnionType("@gendecl", DocumentableType) + +// ImportDeclType is the type of import declaration AST nodes +var ImportDeclType = DeclKind.NewBranch("@importdecl", GenDeclType) + +// ConstDeclType is the type of constant declaration AST nodes +var ConstDeclType = DeclKind.NewBranch("@constdecl", GenDeclType) + +// TypeDeclType is the type of type declaration AST nodes +var TypeDeclType = DeclKind.NewBranch("@typedecl", GenDeclType) + +// VarDeclType is the type of variable declaration AST nodes +var VarDeclType = DeclKind.NewBranch("@vardecl", GenDeclType) + +// FuncDeclType is the type of function declaration AST nodes +var FuncDeclType = DeclKind.NewBranch("@funcdecl", DocumentableType, FuncDefType, TypeParamDeclParentType) + +// SpecKind is a case type for distinguishing different kinds of declaration specification nodes +var SpecKind = NewCaseType(SpecType, "kind") + +// ImportSpecType is the type of import declaration specification nodes +var ImportSpecType = SpecKind.NewBranch("@importspec") + +// ValueSpecType is the type of value declaration specification nodes +var ValueSpecType = SpecKind.NewBranch("@valuespec") + +// TypeSpecType is the type of type declaration specification nodes +var TypeSpecType = NewUnionType("@typespec", TypeParamDeclParentType) + +// TypeDefSpecType is the type of type declaration specification nodes corresponding to type definitions +var TypeDefSpecType = SpecKind.NewBranch("@typedefspec", TypeSpecType) + +// AliasSpecType is the type of type declaration specification nodes corresponding to alias declarations +var AliasSpecType = SpecKind.NewBranch("@aliasspec", TypeSpecType) + +// ObjectType is the type of objects (that is, declared entities) +var ObjectType = NewPrimaryKeyType("@object") + +// ObjectKind is a case type for distinguishing different kinds of built-in and declared objects +var ObjectKind = NewCaseType(ObjectType, "kind") + +// TypeParamParentObjectType is the type of objects that can have type parameters as children +var TypeParamParentObjectType = NewUnionType("@typeparamparentobject") + +// DeclObjectType is the type of declared objects +var DeclObjectType = NewUnionType("@declobject") + +// BuiltinObjectType is the type of built-in objects +var BuiltinObjectType = NewUnionType("@builtinobject") + +// PkgObjectType is the type of imported packages +var PkgObjectType = ObjectKind.NewBranch("@pkgobject") + +// TypeObjectType is the type of declared or built-in named types +var TypeObjectType = NewUnionType("@typeobject") + +// DeclTypeObjectType is the type of declared named types +var DeclTypeObjectType = ObjectKind.NewBranch("@decltypeobject", TypeObjectType, DeclObjectType, TypeParamParentObjectType) + +// BuiltinTypeObjectType is the type of built-in named types +var BuiltinTypeObjectType = ObjectKind.NewBranch("@builtintypeobject", TypeObjectType, BuiltinObjectType) + +// ValueObjectType is the type of declared or built-in variables or constants +var ValueObjectType = NewUnionType("@valueobject") + +// ConstObjectType is the type of declared or built-in constants +var ConstObjectType = NewUnionType("@constobject", ValueObjectType) + +// DeclConstObjectType is the type of declared constants +var DeclConstObjectType = ObjectKind.NewBranch("@declconstobject", ConstObjectType, DeclObjectType) + +// BuiltinConstObjectType is the type of built-in constants +var BuiltinConstObjectType = ObjectKind.NewBranch("@builtinconstobject", ConstObjectType, BuiltinObjectType) + +// VarObjectType is the type of declared or built-in variables (the latter do not currently exist) +var VarObjectType = NewUnionType("@varobject", ValueObjectType) + +// DeclVarObjectType is the type of declared variables including function parameters, results and struct fields +var DeclVarObjectType = ObjectKind.NewBranch("@declvarobject", VarObjectType, DeclObjectType) + +// FunctionObjectType is the type of declared or built-in functions +var FunctionObjectType = NewUnionType("@functionobject", ValueObjectType) + +// DeclFuncObjectType is the type of declared functions, including (abstract and concrete) methods +var DeclFuncObjectType = ObjectKind.NewBranch("@declfunctionobject", FunctionObjectType, DeclObjectType, TypeParamParentObjectType) + +// BuiltinFuncObjectType is the type of built-in functions +var BuiltinFuncObjectType = ObjectKind.NewBranch("@builtinfunctionobject", FunctionObjectType, BuiltinObjectType) + +// LabelObjectType is the type of statement labels +var LabelObjectType = ObjectKind.NewBranch("@labelobject") + +// ScopeType is the type of scopes +var ScopeType = NewPrimaryKeyType("@scope") + +// ScopeKind is a case type for distinguishing different kinds of scopes +var ScopeKind = NewCaseType(ScopeType, "kind") + +// UniverseScopeType is the type of the universe scope +var UniverseScopeType = ScopeKind.NewBranch("@universescope") + +// PackageScopeType is the type of package scopes +var PackageScopeType = ScopeKind.NewBranch("@packagescope") + +// LocalScopeType is the type of local (that is, non-universe, non-package) scopes +var LocalScopeType = ScopeKind.NewBranch("@localscope", LocatableType) + +// TypeKind is a case type for distinguishing different kinds of types +var TypeKind = NewCaseType(TypeType, "kind") + +// BasicType is the union of all basic types +var BasicType = NewUnionType("@basictype") + +// BoolType is the union of the normal and literal bool types +var BoolType = NewUnionType("@booltype", BasicType) + +// NumericType is the union of numeric types +var NumericType = NewUnionType("@numerictype", BasicType) + +// IntegerType is the union of integer types +var IntegerType = NewUnionType("@integertype", NumericType) + +// SignedIntegerType is the union of signed integer types +var SignedIntegerType = NewUnionType("@signedintegertype", IntegerType) + +// UnsignedIntegerType is the union of unsigned integer types +var UnsignedIntegerType = NewUnionType("@unsignedintegertype", IntegerType) + +// FloatType is the union of floating-point types +var FloatType = NewUnionType("@floattype", NumericType) + +// ComplexType is the union of complex types +var ComplexType = NewUnionType("@complextype", NumericType) + +// StringType is the union of the normal and literal string types +var StringType = NewUnionType("@stringtype", BasicType) + +// LiteralType is the union of literal types +var LiteralType = NewUnionType("@literaltype", BasicType) + +// BasicTypes is a map from basic type kinds to the corresponding entity types +var BasicTypes = map[gotypes.BasicKind]*BranchType{ + gotypes.Invalid: TypeKind.NewBranch("@invalidtype", BasicType), + gotypes.Bool: TypeKind.NewBranch("@boolexprtype", BoolType), + gotypes.Int: TypeKind.NewBranch("@inttype", SignedIntegerType), + gotypes.Int8: TypeKind.NewBranch("@int8type", SignedIntegerType), + gotypes.Int16: TypeKind.NewBranch("@int16type", SignedIntegerType), + gotypes.Int32: TypeKind.NewBranch("@int32type", SignedIntegerType), + gotypes.Int64: TypeKind.NewBranch("@int64type", SignedIntegerType), + gotypes.Uint: TypeKind.NewBranch("@uinttype", UnsignedIntegerType), + gotypes.Uint8: TypeKind.NewBranch("@uint8type", UnsignedIntegerType), + gotypes.Uint16: TypeKind.NewBranch("@uint16type", UnsignedIntegerType), + gotypes.Uint32: TypeKind.NewBranch("@uint32type", UnsignedIntegerType), + gotypes.Uint64: TypeKind.NewBranch("@uint64type", UnsignedIntegerType), + gotypes.Uintptr: TypeKind.NewBranch("@uintptrtype", UnsignedIntegerType), + gotypes.Float32: TypeKind.NewBranch("@float32type", FloatType), + gotypes.Float64: TypeKind.NewBranch("@float64type", FloatType), + gotypes.Complex64: TypeKind.NewBranch("@complex64type", ComplexType), + gotypes.Complex128: TypeKind.NewBranch("@complex128type", ComplexType), + gotypes.String: TypeKind.NewBranch("@stringexprtype", StringType), + gotypes.UnsafePointer: TypeKind.NewBranch("@unsafepointertype", BasicType), + gotypes.UntypedBool: TypeKind.NewBranch("@boolliteraltype", LiteralType, BoolType), + gotypes.UntypedInt: TypeKind.NewBranch("@intliteraltype", LiteralType, SignedIntegerType), + gotypes.UntypedRune: TypeKind.NewBranch("@runeliteraltype", LiteralType, SignedIntegerType), + gotypes.UntypedFloat: TypeKind.NewBranch("@floatliteraltype", LiteralType, FloatType), + gotypes.UntypedComplex: TypeKind.NewBranch("@complexliteraltype", LiteralType, ComplexType), + gotypes.UntypedString: TypeKind.NewBranch("@stringliteraltype", LiteralType, StringType), + gotypes.UntypedNil: TypeKind.NewBranch("@nilliteraltype", LiteralType), +} + +// CompositeType is the type of all composite (that is, non-basic) types +var CompositeType = NewUnionType("@compositetype") + +// TypeParamType is the type of type parameter types +var TypeParamType = TypeKind.NewBranch("@typeparamtype", CompositeType) + +// ElementContainerType is the type of types that have elements, such as arrays +// and channels +var ElementContainerType = NewUnionType("@containertype", CompositeType) + +// ArrayType is the type of array types +var ArrayType = TypeKind.NewBranch("@arraytype", ElementContainerType) + +// SliceType is the type of slice types +var SliceType = TypeKind.NewBranch("@slicetype", ElementContainerType) + +// StructType is the type of struct types +var StructType = TypeKind.NewBranch("@structtype", CompositeType) + +// PointerType is the type of pointer types +var PointerType = TypeKind.NewBranch("@pointertype", CompositeType) + +// InterfaceType is the type of interface types +var InterfaceType = TypeKind.NewBranch("@interfacetype", CompositeType) + +// TupleType is the type of tuple types +var TupleType = TypeKind.NewBranch("@tupletype", CompositeType) + +// SignatureType is the type of signature types +var SignatureType = TypeKind.NewBranch("@signaturetype", CompositeType) + +// MapType is the type of map types +var MapType = TypeKind.NewBranch("@maptype", ElementContainerType) + +// ChanType is the type of channel types +var ChanType = NewUnionType("@chantype", ElementContainerType) + +// ChanTypes is a map from channel type directions to the corresponding type +var ChanTypes = map[gotypes.ChanDir]*BranchType{ + gotypes.SendOnly: TypeKind.NewBranch("@sendchantype", ChanType), + gotypes.RecvOnly: TypeKind.NewBranch("@recvchantype", ChanType), + gotypes.SendRecv: TypeKind.NewBranch("@sendrcvchantype", ChanType), +} + +// NamedType is the type of named types +var NamedType = TypeKind.NewBranch("@namedtype", CompositeType) + +// TypeSetLiteral is the type of type set literals +var TypeSetLiteral = TypeKind.NewBranch("@typesetliteraltype", CompositeType) + +// PackageType is the type of packages +var PackageType = NewPrimaryKeyType("@package") + +// ModExprType is the type of go.mod expression nodes +var ModExprType = NewPrimaryKeyType("@modexpr", ModExprParentType, DocumentableType) + +// ModExprKind is a case type for distinguishing different kinds of go.mod expression nodes +var ModExprKind = NewCaseType(ModExprType, "kind") + +// ModCommentBlockType is the type of go.mod comment block AST nodes +var ModCommentBlockType = ModExprKind.NewBranch("@modcommentblock") + +// ModLineType is the type of go.mod line AST nodes +var ModLineType = ModExprKind.NewBranch("@modline") + +// ModLineBlockType is the type of go.mod line block AST nodes +var ModLineBlockType = ModExprKind.NewBranch("@modlineblock") + +// ModLParenType is the type of go.mod line block start AST nodes +var ModLParenType = ModExprKind.NewBranch("@modlparen") + +// ModRParenType is the type of go.mod line block end AST nodes +var ModRParenType = ModExprKind.NewBranch("@modrparen") + +// ErrorType is the type of frontend errors +var ErrorType = NewPrimaryKeyType("@error") + +// ErrorKind is a case type for distinguishing different kinds of frontend errors +var ErrorKind = NewCaseType(ErrorType, "kind") + +// ErrorTypes is a map from error kinds to the corresponding type +var ErrorTypes = map[packages.ErrorKind]*BranchType{ + packages.UnknownError: ErrorKind.NewBranch("@unknownerror"), + packages.ListError: ErrorKind.NewBranch("@listerror"), + packages.ParseError: ErrorKind.NewBranch("@parseerror"), + packages.TypeError: ErrorKind.NewBranch("@typeerror"), +} + +// ErrorTypes is a map from error kinds to the corresponding tag +var ErrorTags = map[packages.ErrorKind]string{ + packages.UnknownError: "@unknownerror", + packages.ListError: "@listerror", + packages.ParseError: "@parseerror", + packages.TypeError: "@typeerror", +} + +// LocationsDefaultTable is the table defining location objects +var LocationsDefaultTable = NewTable("locations_default", + EntityColumn(LocationDefaultType, "id").Key(), + EntityColumn(FileType, "file"), + IntColumn("beginLine"), + IntColumn("beginColumn"), + IntColumn("endLine"), + IntColumn("endColumn"), +) + +// NumlinesTable is the table containing LoC information +var NumlinesTable = NewTable("numlines", + EntityColumn(SourceLineType, "element_id"), + IntColumn("num_lines"), + IntColumn("num_code"), + IntColumn("num_comment"), +) + +// FilesTable is the table defining file nodes +var FilesTable = NewTable("files", + EntityColumn(FileType, "id").Key(), + StringColumn("name"), +) + +// FoldersTable is the table defining folder entities +var FoldersTable = NewTable("folders", + EntityColumn(FolderType, "id").Key(), + StringColumn("name"), +) + +// ContainerParentTable is the table defining the parent-child relation among container entities +var ContainerParentTable = NewTable("containerparent", + EntityColumn(ContainerType, "parent"), + EntityColumn(ContainerType, "child").Unique(), +) + +// HasLocationTable is the table associating entities with their locations +var HasLocationTable = NewTable("has_location", + EntityColumn(LocatableType, "locatable").Unique(), + EntityColumn(LocationType, "location"), +) + +// CommentGroupsTable is the table defining comment group entities +var CommentGroupsTable = NewTable("comment_groups", + EntityColumn(CommentGroupType, "id").Key(), + EntityColumn(FileType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// CommentsTable is the table defining comment entities +var CommentsTable = NewTable("comments", + EntityColumn(CommentType, "id").Key(), + IntColumn("kind"), + EntityColumn(CommentGroupType, "parent"), + IntColumn("idx"), + StringColumn("text"), +) + +// DocCommentsTable is the table associating doc comments with the nodes they document +var DocCommentsTable = NewTable("doc_comments", + EntityColumn(DocumentableType, "node").Unique(), + EntityColumn(CommentGroupType, "comment"), +) + +// ExprsTable is the table defining expression AST nodes +var ExprsTable = NewTable("exprs", + EntityColumn(ExprType, "id").Key(), + IntColumn("kind"), + EntityColumn(ExprParentType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// LiteralsTable is the table associating literal expression AST nodes with their values +var LiteralsTable = NewTable("literals", + EntityColumn(ExprType, "expr").Unique(), + StringColumn("value"), + StringColumn("raw"), +) + +// ConstValuesTable is the table associating constant expressions with their values +var ConstValuesTable = NewTable("constvalues", + EntityColumn(ExprType, "expr").Unique(), + StringColumn("value"), + StringColumn("exact"), +) + +// FieldsTable is the table defining field AST nodes +var FieldsTable = NewTable("fields", + EntityColumn(FieldType, "id").Key(), + EntityColumn(FieldParentType, "parent"), + IntColumn("idx"), +) + +// TypeParamDeclsTable is the table defining type param declaration AST nodes +var TypeParamDeclsTable = NewTable("typeparamdecls", + EntityColumn(TypeParamDeclType, "id").Key(), + EntityColumn(TypeParamDeclParentType, "parent"), + IntColumn("idx"), +) + +// StmtsTable is the table defining statement AST nodes +var StmtsTable = NewTable("stmts", + EntityColumn(StmtType, "id").Key(), + IntColumn("kind"), + EntityColumn(StmtParentType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// DeclsTable is the table defining declaration AST nodes +var DeclsTable = NewTable("decls", + EntityColumn(DeclType, "id").Key(), + IntColumn("kind"), + EntityColumn(DeclParentType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// SpecsTable is the table defining declaration specification AST nodes +var SpecsTable = NewTable("specs", + EntityColumn(SpecType, "id").Key(), + IntColumn("kind"), + EntityColumn(GenDeclType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// ScopesTable is the table defining scopes +var ScopesTable = NewTable("scopes", + EntityColumn(ScopeType, "id").Key(), + IntColumn("kind"), +) + +// ScopeNestingTable is the table describing scope nesting +var ScopeNestingTable = NewTable("scopenesting", + EntityColumn(ScopeType, "inner").Unique(), + EntityColumn(ScopeType, "outer"), +) + +// ScopeNodesTable is the table associating local scopes with the AST nodes that induce them +var ScopeNodesTable = NewTable("scopenodes", + EntityColumn(ScopeNodeType, "node").Unique(), + EntityColumn(LocalScopeType, "scope"), +) + +// ObjectsTable is the table describing objects (that is, declared entities) +var ObjectsTable = NewTable("objects", + EntityColumn(ObjectType, "id").Key(), + IntColumn("kind"), + StringColumn("name"), +) + +// ObjectScopesTable is the table describing the scope to which an object belongs (if any) +var ObjectScopesTable = NewTable("objectscopes", + EntityColumn(ObjectType, "object").Unique(), + EntityColumn(ScopeType, "scope"), +) + +// ObjectTypesTable is the table describing the type of an object (if any) +var ObjectTypesTable = NewTable("objecttypes", + EntityColumn(ObjectType, "object").Unique(), + EntityColumn(TypeType, "tp"), +) + +// MethodReceiversTable maps methods to their receiver +var MethodReceiversTable = NewTable("methodreceivers", + EntityColumn(ObjectType, "method").Unique(), + EntityColumn(ObjectType, "receiver"), +) + +// FieldStructsTable maps fields to the structs they are in +var FieldStructsTable = NewTable("fieldstructs", + EntityColumn(ObjectType, "field").Unique(), + EntityColumn(StructType, "struct"), +) + +// MethodHostsTable maps interface methods to the named type they belong to +var MethodHostsTable = NewTable("methodhosts", + EntityColumn(ObjectType, "method"), + EntityColumn(NamedType, "host"), +) + +// DefsTable maps identifiers to the objects they define +var DefsTable = NewTable("defs", + EntityColumn(IdentExpr, "ident"), + EntityColumn(ObjectType, "object"), +) + +// UsesTable maps identifiers to the objects they denote +var UsesTable = NewTable("uses", + EntityColumn(IdentExpr, "ident"), + EntityColumn(ObjectType, "object"), +) + +// TypesTable is the table describing types +var TypesTable = NewTable("types", + EntityColumn(TypeType, "id").Key(), + IntColumn("kind"), +) + +// TypeOfTable is the table associating expressions with their types (if known) +var TypeOfTable = NewTable("type_of", + EntityColumn(ExprType, "expr").Unique(), + EntityColumn(TypeType, "tp"), +) + +// TypeNameTable is the table associating named types with their names +var TypeNameTable = NewTable("typename", + EntityColumn(TypeType, "tp").Unique(), + StringColumn("name"), +) + +// KeyTypeTable is the table associating maps with their key type +var KeyTypeTable = NewTable("key_type", + EntityColumn(MapType, "map").Unique(), + EntityColumn(TypeType, "tp"), +) + +// ElementTypeTable is the table associating container types with their element +// type +var ElementTypeTable = NewTable("element_type", + EntityColumn(ElementContainerType, "container").Unique(), + EntityColumn(TypeType, "tp"), +) + +// BaseTypeTable is the table associating pointer types with their base type +var BaseTypeTable = NewTable("base_type", + EntityColumn(PointerType, "ptr").Unique(), + EntityColumn(TypeType, "tp"), +) + +// UnderlyingTypeTable is the table associating named types with their +// underlying type +var UnderlyingTypeTable = NewTable("underlying_type", + EntityColumn(NamedType, "named").Unique(), + EntityColumn(TypeType, "tp"), +) + +// ComponentTypesTable is the table associating composite types with their component types +var ComponentTypesTable = NewTable("component_types", + EntityColumn(CompositeType, "parent"), + IntColumn("index"), + StringColumn("name"), + EntityColumn(TypeType, "tp"), +).KeySet("parent", "index") + +// ArrayLengthTable is the table associating array types with their length (represented as a string +// since Go array lengths are 64-bit and hence do not always fit into a QL integer) +var ArrayLengthTable = NewTable("array_length", + EntityColumn(ArrayType, "tp").Unique(), + StringColumn("len"), +) + +// TypeObjectTable maps types to their corresponding objects, if any +var TypeObjectTable = NewTable("type_objects", + EntityColumn(TypeType, "tp").Unique(), + EntityColumn(ObjectType, "object"), +) + +// PackagesTable is the table describing packages +var PackagesTable = NewTable("packages", + EntityColumn(PackageType, "id").Key(), + StringColumn("name"), + StringColumn("path"), + EntityColumn(PackageScopeType, "scope"), +) + +// ModExprsTable is the table defining expression AST nodes for go.mod files +var ModExprsTable = NewTable("modexprs", + EntityColumn(ModExprType, "id").Key(), + IntColumn("kind"), + EntityColumn(ModExprParentType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// ModTokensTable is the table associating go.mod tokens with their Line or LineBlock +var ModTokensTable = NewTable("modtokens", + StringColumn("token"), + EntityColumn(ModExprType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") + +// ErrorsTable is the table describing frontend errors +var ErrorsTable = NewTable("errors", + EntityColumn(ErrorType, "id").Key(), + IntColumn("kind"), + StringColumn("msg"), + StringColumn("rawpos"), + StringColumn("file"), + IntColumn("line"), + IntColumn("col"), + EntityColumn(PackageType, "package"), + IntColumn("idx"), +).KeySet("package", "idx") + +// HasEllipsisTable is the table containing all call expressions that have ellipses +var HasEllipsisTable = NewTable("has_ellipsis", + EntityColumn(CallOrConversionExpr, "id"), +) + +// VariadicTable is the table describing which functions are variadic +var VariadicTable = NewTable("variadic", + EntityColumn(SignatureType, "id"), +) + +// TypeParamTable is the table describing type parameter types +var TypeParamTable = NewTable("typeparam", + EntityColumn(TypeParamType, "tp").Unique(), + StringColumn("name"), + EntityColumn(CompositeType, "bound"), + EntityColumn(TypeParamParentObjectType, "parent"), + IntColumn("idx"), +).KeySet("parent", "idx") diff --git a/go/extractor/extractor.go b/go/extractor/extractor.go new file mode 100644 index 00000000000..1be2bfef224 --- /dev/null +++ b/go/extractor/extractor.go @@ -0,0 +1,2021 @@ +package extractor + +import ( + "crypto/md5" + "encoding/hex" + "fmt" + "go/ast" + "go/constant" + "go/scanner" + "go/token" + "go/types" + "io" + "io/ioutil" + "log" + "os" + "path/filepath" + "regexp" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "github.com/github/codeql-go/extractor/dbscheme" + "github.com/github/codeql-go/extractor/srcarchive" + "github.com/github/codeql-go/extractor/trap" + "github.com/github/codeql-go/extractor/util" + "golang.org/x/tools/go/packages" +) + +var MaxGoRoutines int +var typeParamParent map[*types.TypeParam]types.Object = make(map[*types.TypeParam]types.Object) + +func init() { + // this sets the number of threads that the Go runtime will spawn; this is separate + // from the number of goroutines that the program spawns, which are scheduled into + // the system threads by the Go runtime scheduler + threads := os.Getenv("LGTM_THREADS") + if maxprocs, err := strconv.Atoi(threads); err == nil && maxprocs > 0 { + log.Printf("Max threads set to %d", maxprocs) + runtime.GOMAXPROCS(maxprocs) + } else if threads != "" { + log.Printf("Warning: LGTM_THREADS value %s is not valid, defaulting to using all available threads.", threads) + } + // if the value is empty or not set, use the Go default, which is the number of cores + // available since Go 1.5, but is subject to change + + var err error + if MaxGoRoutines, err = strconv.Atoi(util.Getenv( + "CODEQL_EXTRACTOR_GO_MAX_GOROUTINES", + "SEMMLE_MAX_GOROUTINES", + )); err != nil { + MaxGoRoutines = 32 + } else { + log.Printf("Max goroutines set to %d", MaxGoRoutines) + } +} + +// Extract extracts the packages specified by the given patterns +func Extract(patterns []string) error { + return ExtractWithFlags(nil, patterns) +} + +// ExtractWithFlags extracts the packages specified by the given patterns and build flags +func ExtractWithFlags(buildFlags []string, patterns []string) error { + startTime := time.Now() + + extraction := NewExtraction(buildFlags, patterns) + defer extraction.StatWriter.Close() + + modEnabled := os.Getenv("GO111MODULE") != "off" + if !modEnabled { + log.Println("Go module mode disabled.") + } + + modFlags := make([]string, 0, 1) + for _, flag := range buildFlags { + if strings.HasPrefix(flag, "-mod=") { + modFlags = append(modFlags, flag) + } + } + + log.Println("Running packages.Load.") + cfg := &packages.Config{ + Mode: packages.NeedName | packages.NeedFiles | + packages.NeedCompiledGoFiles | + packages.NeedImports | packages.NeedDeps | + packages.NeedTypes | packages.NeedTypesSizes | + packages.NeedTypesInfo | packages.NeedSyntax, + BuildFlags: buildFlags, + } + pkgs, err := packages.Load(cfg, patterns...) + if err != nil { + return err + } + log.Println("Done running packages.Load.") + + if len(pkgs) == 0 { + log.Println("No packages found.") + } + + log.Println("Extracting universe scope.") + extractUniverseScope() + log.Println("Done extracting universe scope.") + + // a map of package path to package root directory (currently the module root or the source directory) + pkgRoots := make(map[string]string) + // a map of package path to source code directory + pkgDirs := make(map[string]string) + // root directories of packages that we want to extract + wantedRoots := make(map[string]bool) + + // Do a post-order traversal and extract the package scope of each package + packages.Visit(pkgs, func(pkg *packages.Package) bool { + return true + }, func(pkg *packages.Package) { + log.Printf("Processing package %s.", pkg.PkgPath) + + if _, ok := pkgRoots[pkg.PkgPath]; !ok { + mdir := util.GetModDir(pkg.PkgPath, modFlags...) + pdir := util.GetPkgDir(pkg.PkgPath, modFlags...) + // GetModDir returns the empty string if the module directory cannot be determined, e.g. if the package + // is not using modules. If this is the case, fall back to the package directory + if mdir == "" { + mdir = pdir + } + pkgRoots[pkg.PkgPath] = mdir + pkgDirs[pkg.PkgPath] = pdir + } + + log.Printf("Extracting types for package %s.", pkg.PkgPath) + + tw, err := trap.NewWriter(pkg.PkgPath, pkg) + if err != nil { + log.Fatal(err) + } + defer tw.Close() + + scope := extractPackageScope(tw, pkg) + extractObjectTypes(tw) + lbl := tw.Labeler.GlobalID(util.EscapeTrapSpecialChars(pkg.PkgPath) + ";pkg") + dbscheme.PackagesTable.Emit(tw, lbl, pkg.Name, pkg.PkgPath, scope) + + if len(pkg.Errors) != 0 { + log.Printf("Warning: encountered errors extracting package `%s`:", pkg.PkgPath) + for i, err := range pkg.Errors { + log.Printf(" %s", err.Error()) + extraction.extractError(tw, err, lbl, i) + } + } + log.Printf("Done extracting types for package %s.", pkg.PkgPath) + }) + + for _, pkg := range pkgs { + if pkgRoots[pkg.PkgPath] == "" { + log.Fatalf("Unable to get a source directory for input package %s.", pkg.PkgPath) + } + wantedRoots[pkgRoots[pkg.PkgPath]] = true + wantedRoots[pkgDirs[pkg.PkgPath]] = true + } + + log.Println("Done processing dependencies.") + + log.Println("Starting to extract packages.") + + sep := regexp.QuoteMeta(string(filepath.Separator)) + // if a path matches this regexp, we don't want to extract this package. Currently, it checks + // - that the path does not contain a `..` segment, and + // - the path does not contain a `vendor` directory. + noExtractRe := regexp.MustCompile(`.*(^|` + sep + `)(\.\.|vendor)($|` + sep + `).*`) + + // extract AST information for all packages + packages.Visit(pkgs, func(pkg *packages.Package) bool { + return true + }, func(pkg *packages.Package) { + for root, _ := range wantedRoots { + relDir, err := filepath.Rel(root, pkgDirs[pkg.PkgPath]) + if err != nil || noExtractRe.MatchString(relDir) { + // if the path can't be made relative or matches the noExtract regexp skip it + continue + } + + extraction.extractPackage(pkg) + + if pkgRoots[pkg.PkgPath] != "" { + modPath := filepath.Join(pkgRoots[pkg.PkgPath], "go.mod") + if util.FileExists(modPath) { + log.Printf("Extracting %s", modPath) + start := time.Now() + + err := extraction.extractGoMod(modPath) + if err != nil { + log.Printf("Failed to extract go.mod: %s", err.Error()) + } + + end := time.Since(start) + log.Printf("Done extracting %s (%dms)", modPath, end.Nanoseconds()/1000000) + } + } + + return + } + + log.Printf("Skipping dependency package %s.", pkg.PkgPath) + }) + + extraction.WaitGroup.Wait() + + log.Println("Done extracting packages.") + + t := time.Now() + elapsed := t.Sub(startTime) + dbscheme.CompilationFinishedTable.Emit(extraction.StatWriter, extraction.Label, 0.0, elapsed.Seconds()) + + return nil +} + +type Extraction struct { + // A lock for preventing concurrent writes to maps and the stat trap writer, as they are not + // thread-safe + Lock sync.Mutex + LabelKey string + Label trap.Label + StatWriter *trap.Writer + WaitGroup sync.WaitGroup + GoroutineSem *semaphore + FdSem *semaphore + NextFileId int + FileInfo map[string]*FileInfo + SeenGoMods map[string]bool +} + +type FileInfo struct { + Idx int + NextErr int +} + +func (extraction *Extraction) SeenFile(path string) bool { + _, ok := extraction.FileInfo[path] + return ok +} + +func (extraction *Extraction) GetFileInfo(path string) *FileInfo { + if fileInfo, ok := extraction.FileInfo[path]; ok { + return fileInfo + } + + extraction.FileInfo[path] = &FileInfo{extraction.NextFileId, 0} + extraction.NextFileId += 1 + + return extraction.FileInfo[path] +} + +func (extraction *Extraction) GetFileIdx(path string) int { + return extraction.GetFileInfo(path).Idx +} + +func (extraction *Extraction) GetNextErr(path string) int { + finfo := extraction.GetFileInfo(path) + res := finfo.NextErr + finfo.NextErr += 1 + return res +} + +func NewExtraction(buildFlags []string, patterns []string) *Extraction { + hash := md5.New() + io.WriteString(hash, "go") + for _, buildFlag := range buildFlags { + io.WriteString(hash, " "+buildFlag) + } + io.WriteString(hash, " --") + for _, pattern := range patterns { + io.WriteString(hash, " "+pattern) + } + sum := hash.Sum(nil) + + i := 0 + var path string + // split compilation files into directories to avoid filling a single directory with too many files + pathFmt := fmt.Sprintf("compilations/%s/%s_%%d", hex.EncodeToString(sum[:1]), hex.EncodeToString(sum[1:])) + for { + path = fmt.Sprintf(pathFmt, i) + file, err := trap.FileFor(path) + if err != nil { + log.Fatalf("Error creating trap file: %s\n", err.Error()) + } + i++ + + if !util.FileExists(file) { + break + } + } + + statWriter, err := trap.NewWriter(path, nil) + if err != nil { + log.Fatal(err) + } + lblKey := fmt.Sprintf("%s_%d;compilation", hex.EncodeToString(sum), i) + lbl := statWriter.Labeler.GlobalID(lblKey) + + wd, err := os.Getwd() + if err != nil { + log.Fatalf("Unable to determine current directory: %s\n", err.Error()) + } + + dbscheme.CompilationsTable.Emit(statWriter, lbl, wd) + i = 0 + extractorPath, err := util.GetExtractorPath() + if err != nil { + log.Fatalf("Unable to get extractor path: %s\n", err.Error()) + } + dbscheme.CompilationArgsTable.Emit(statWriter, lbl, 0, extractorPath) + i++ + for _, flag := range buildFlags { + dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, flag) + i++ + } + // emit a fake "--" argument to make it clear that what comes after it are patterns + dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, "--") + i++ + for _, pattern := range patterns { + dbscheme.CompilationArgsTable.Emit(statWriter, lbl, i, pattern) + i++ + } + + return &Extraction{ + LabelKey: lblKey, + Label: lbl, + StatWriter: statWriter, + // this semaphore is used to limit the number of files that are open at once; + // this is to prevent the extractor from running into issues with caps on the + // number of open files that can be held by one process + FdSem: newSemaphore(100), + // this semaphore is used to limit the number of goroutines spawned, so we + // don't run into memory issues + GoroutineSem: newSemaphore(MaxGoRoutines), + NextFileId: 0, + FileInfo: make(map[string]*FileInfo), + SeenGoMods: make(map[string]bool), + } +} + +// extractUniverseScope extracts symbol table information for the universe scope +func extractUniverseScope() { + tw, err := trap.NewWriter("universe", nil) + if err != nil { + log.Fatal(err) + } + defer tw.Close() + + lbl := tw.Labeler.ScopeID(types.Universe, nil) + dbscheme.ScopesTable.Emit(tw, lbl, dbscheme.UniverseScopeType.Index()) + extractObjects(tw, types.Universe, lbl) + + // Always extract an empty interface type + extractType(tw, types.NewInterfaceType([]*types.Func{}, []types.Type{})) +} + +// extractObjects extracts all objects declared in the given scope +func extractObjects(tw *trap.Writer, scope *types.Scope, scopeLabel trap.Label) { + for _, name := range scope.Names() { + obj := scope.Lookup(name) + lbl, exists := tw.Labeler.ScopedObjectID(obj, func() trap.Label { return extractType(tw, obj.Type()) }) + if !exists { + // Populate type parameter parents for functions. Note that methods + // do not appear as objects in any scope, so they have to be dealt + // with separately in extractMethods. + if funcObj, ok := obj.(*types.Func); ok { + populateTypeParamParents(tw, funcObj.Type().(*types.Signature).TypeParams(), obj) + populateTypeParamParents(tw, funcObj.Type().(*types.Signature).RecvTypeParams(), obj) + } + // Populate type parameter parents for named types. Note that we + // skip type aliases as the original type should be the parent + // of any type parameters. + if typeNameObj, ok := obj.(*types.TypeName); ok && !typeNameObj.IsAlias() { + if tp, ok := typeNameObj.Type().(*types.Named); ok { + populateTypeParamParents(tw, tp.TypeParams(), obj) + } + } + extractObject(tw, obj, lbl) + } + + if obj.Parent() != scope { + // this can happen if a scope is embedded into another with a `.` import. + continue + } + dbscheme.ObjectScopesTable.Emit(tw, lbl, scopeLabel) + } +} + +// extractMethod extracts a method `meth` and emits it to the objects table, then returns its label +func extractMethod(tw *trap.Writer, meth *types.Func) trap.Label { + // get the receiver type of the method + recvtyp := meth.Type().(*types.Signature).Recv().Type() + // ensure receiver type has been extracted + recvtyplbl := extractType(tw, recvtyp) + + // if the method label does not exist, extract it + methlbl, exists := tw.Labeler.MethodID(meth, recvtyplbl) + if !exists { + // Populate type parameter parents for methods. They do not appear as + // objects in any scope, so they have to be dealt with separately here. + populateTypeParamParents(tw, meth.Type().(*types.Signature).TypeParams(), meth) + populateTypeParamParents(tw, meth.Type().(*types.Signature).RecvTypeParams(), meth) + extractObject(tw, meth, methlbl) + } + + return methlbl +} + +// extractObject extracts a single object and emits it to the objects table. +func extractObject(tw *trap.Writer, obj types.Object, lbl trap.Label) { + name := obj.Name() + isBuiltin := obj.Parent() == types.Universe + var kind int + switch obj.(type) { + case *types.PkgName: + kind = dbscheme.PkgObjectType.Index() + case *types.TypeName: + if isBuiltin { + kind = dbscheme.BuiltinTypeObjectType.Index() + } else { + kind = dbscheme.DeclTypeObjectType.Index() + } + case *types.Const: + if isBuiltin { + kind = dbscheme.BuiltinConstObjectType.Index() + } else { + kind = dbscheme.DeclConstObjectType.Index() + } + case *types.Nil: + kind = dbscheme.BuiltinConstObjectType.Index() + case *types.Var: + kind = dbscheme.DeclVarObjectType.Index() + case *types.Builtin: + kind = dbscheme.BuiltinFuncObjectType.Index() + case *types.Func: + kind = dbscheme.DeclFuncObjectType.Index() + case *types.Label: + kind = dbscheme.LabelObjectType.Index() + default: + log.Fatalf("unknown object of type %T", obj) + } + dbscheme.ObjectsTable.Emit(tw, lbl, kind, name) + + // for methods, additionally extract information about the receiver + if sig, ok := obj.Type().(*types.Signature); ok { + if recv := sig.Recv(); recv != nil { + recvlbl, exists := tw.Labeler.ReceiverObjectID(recv, lbl) + if !exists { + extractObject(tw, recv, recvlbl) + } + dbscheme.MethodReceiversTable.Emit(tw, lbl, recvlbl) + } + } +} + +// extractObjectTypes extracts type and receiver information for all objects +func extractObjectTypes(tw *trap.Writer) { + // calling `extractType` on a named type will extract all methods defined + // on it, which will add new objects. Therefore we need to do this first + // before we loops over all objects and emit them. + changed := true + for changed { + changed = tw.ForEachObject(extractObjectType) + } + changed = tw.ForEachObject(emitObjectType) + if changed { + log.Printf("Warning: more objects were labeled while emitted object types") + } +} + +// extractObjectType extracts type and receiver information for a given object +func extractObjectType(tw *trap.Writer, obj types.Object, lbl trap.Label) { + if tp := obj.Type(); tp != nil { + extractType(tw, tp) + } +} + +// emitObjectType emits the type information for a given object +func emitObjectType(tw *trap.Writer, obj types.Object, lbl trap.Label) { + if tp := obj.Type(); tp != nil { + dbscheme.ObjectTypesTable.Emit(tw, lbl, extractType(tw, tp)) + } +} + +var ( + // file:line:col + threePartPos = regexp.MustCompile(`^(.+):(\d+):(\d+)$`) + // file:line + twoPartPos = regexp.MustCompile(`^(.+):(\d+)$`) +) + +// extractError extracts the message and location of a frontend error +func (extraction *Extraction) extractError(tw *trap.Writer, err packages.Error, pkglbl trap.Label, idx int) { + var ( + lbl = tw.Labeler.FreshID() + tag = dbscheme.ErrorTags[err.Kind] + kind = dbscheme.ErrorTypes[err.Kind].Index() + pos = err.Pos + file = "" + line, col int + e error + ) + + if pos == "" || pos == "-" { + // extract a dummy file + wd, e := os.Getwd() + if e != nil { + wd = "." + log.Printf("Warning: failed to get working directory") + } + ewd, e := filepath.EvalSymlinks(wd) + if e != nil { + ewd = wd + log.Printf("Warning: failed to evaluate symlinks for %s", wd) + } + file = filepath.Join(ewd, "-") + } else { + var rawfile string + if parts := threePartPos.FindStringSubmatch(pos); parts != nil { + // "file:line:col" + col, e = strconv.Atoi(parts[3]) + if e != nil { + log.Printf("Warning: malformed column number `%s`: %v", parts[3], e) + } + line, e = strconv.Atoi(parts[2]) + if e != nil { + log.Printf("Warning: malformed line number `%s`: %v", parts[2], e) + } + rawfile = parts[1] + } else if parts := twoPartPos.FindStringSubmatch(pos); parts != nil { + // "file:line" + line, e = strconv.Atoi(parts[2]) + if e != nil { + log.Printf("Warning: malformed line number `%s`: %v", parts[2], e) + } + rawfile = parts[1] + } else if pos != "" && pos != "-" { + log.Printf("Warning: malformed error position `%s`", pos) + } + afile, e := filepath.Abs(rawfile) + if e != nil { + log.Printf("Warning: failed to get absolute path for for %s", file) + afile = file + } + file, e = filepath.EvalSymlinks(afile) + if e != nil { + log.Printf("Warning: failed to evaluate symlinks for %s", afile) + file = afile + } + + extraction.extractFileInfo(tw, file) + } + + extraction.Lock.Lock() + flbl := extraction.StatWriter.Labeler.FileLabelFor(file) + diagLbl := extraction.StatWriter.Labeler.FreshID() + dbscheme.DiagnosticsTable.Emit( + extraction.StatWriter, diagLbl, 1, tag, err.Msg, err.Msg, + emitLocation(extraction.StatWriter, flbl, line, col, line, col)) + dbscheme.DiagnosticForTable.Emit(extraction.StatWriter, diagLbl, extraction.Label, extraction.GetFileIdx(file), extraction.GetNextErr(file)) + extraction.Lock.Unlock() + transformed := filepath.ToSlash(srcarchive.TransformPath(file)) + dbscheme.ErrorsTable.Emit(tw, lbl, kind, err.Msg, pos, transformed, line, col, pkglbl, idx) +} + +// extractPackage extracts AST information for all files in the given package +func (extraction *Extraction) extractPackage(pkg *packages.Package) { + for _, astFile := range pkg.Syntax { + extraction.WaitGroup.Add(1) + extraction.GoroutineSem.acquire(1) + go func(astFile *ast.File) { + err := extraction.extractFile(astFile, pkg) + if err != nil { + log.Fatal(err) + } + extraction.GoroutineSem.release(1) + extraction.WaitGroup.Done() + }(astFile) + } +} + +// normalizedPath computes the normalized path (with symlinks resolved) for the given file +func normalizedPath(ast *ast.File, fset *token.FileSet) string { + file := fset.File(ast.Package).Name() + path, err := filepath.EvalSymlinks(file) + if err != nil { + return file + } + return path +} + +// extractFile extracts AST information for the given file +func (extraction *Extraction) extractFile(ast *ast.File, pkg *packages.Package) error { + fset := pkg.Fset + if ast.Package == token.NoPos { + log.Printf("Skipping extracting a file without a 'package' declaration") + return nil + } + path := normalizedPath(ast, fset) + + extraction.FdSem.acquire(3) + + log.Printf("Extracting %s", path) + start := time.Now() + + defer extraction.FdSem.release(1) + tw, err := trap.NewWriter(path, pkg) + if err != nil { + extraction.FdSem.release(2) + return err + } + defer tw.Close() + + err = srcarchive.Add(path) + extraction.FdSem.release(2) + if err != nil { + return err + } + + extraction.extractFileInfo(tw, path) + + extractScopes(tw, ast, pkg) + + extractFileNode(tw, ast) + + extractObjectTypes(tw) + + extractNumLines(tw, path, ast) + + end := time.Since(start) + log.Printf("Done extracting %s (%dms)", path, end.Nanoseconds()/1000000) + + return nil +} + +// extractFileInfo extracts file-system level information for the given file, populating +// the `files` and `containerparent` tables +func (extraction *Extraction) extractFileInfo(tw *trap.Writer, file string) { + // We may visit the same file twice because `extractError` calls this function to describe files containing + // compilation errors. It is also called for user source files being extracted. + extraction.Lock.Lock() + if extraction.SeenFile(file) { + extraction.Lock.Unlock() + return + } + extraction.Lock.Unlock() + + path := filepath.ToSlash(srcarchive.TransformPath(file)) + components := strings.Split(path, "/") + parentPath := "" + var parentLbl trap.Label + + for i, component := range components { + if i == 0 { + if component == "" { + path = "/" + } else { + path = component + } + } else { + path = parentPath + "/" + component + } + if i == len(components)-1 { + lbl := tw.Labeler.FileLabelFor(file) + dbscheme.FilesTable.Emit(tw, lbl, path) + dbscheme.ContainerParentTable.Emit(tw, parentLbl, lbl) + dbscheme.HasLocationTable.Emit(tw, lbl, emitLocation(tw, lbl, 0, 0, 0, 0)) + extraction.Lock.Lock() + slbl := extraction.StatWriter.Labeler.FileLabelFor(file) + dbscheme.CompilationCompilingFilesTable.Emit(extraction.StatWriter, extraction.Label, extraction.GetFileIdx(file), slbl) + extraction.Lock.Unlock() + break + } + lbl := tw.Labeler.GlobalID(util.EscapeTrapSpecialChars(path) + ";folder") + dbscheme.FoldersTable.Emit(tw, lbl, path) + if i > 0 { + dbscheme.ContainerParentTable.Emit(tw, parentLbl, lbl) + } + if path != "/" { + parentPath = path + } + parentLbl = lbl + } +} + +// extractLocation emits a location entity for the given entity +func extractLocation(tw *trap.Writer, entity trap.Label, sl int, sc int, el int, ec int) { + filelbl := tw.Labeler.FileLabel() + dbscheme.HasLocationTable.Emit(tw, entity, emitLocation(tw, filelbl, sl, sc, el, ec)) +} + +// emitLocation emits a location entity +func emitLocation(tw *trap.Writer, filelbl trap.Label, sl int, sc int, el int, ec int) trap.Label { + locLbl := tw.Labeler.GlobalID(fmt.Sprintf("loc,{%s},%d,%d,%d,%d", filelbl, sl, sc, el, ec)) + dbscheme.LocationsDefaultTable.Emit(tw, locLbl, filelbl, sl, sc, el, ec) + + return locLbl +} + +// extractNodeLocation extracts location information for the given node +func extractNodeLocation(tw *trap.Writer, nd ast.Node, lbl trap.Label) { + if nd == nil { + return + } + fset := tw.Package.Fset + start, end := fset.Position(nd.Pos()), fset.Position(nd.End()) + extractLocation(tw, lbl, start.Line, start.Column, end.Line, end.Column-1) +} + +// extractPackageScope extracts symbol table information for the given package +func extractPackageScope(tw *trap.Writer, pkg *packages.Package) trap.Label { + pkgScope := pkg.Types.Scope() + pkgScopeLabel := tw.Labeler.ScopeID(pkgScope, pkg.Types) + dbscheme.ScopesTable.Emit(tw, pkgScopeLabel, dbscheme.PackageScopeType.Index()) + dbscheme.ScopeNestingTable.Emit(tw, pkgScopeLabel, tw.Labeler.ScopeID(types.Universe, nil)) + extractObjects(tw, pkgScope, pkgScopeLabel) + return pkgScopeLabel +} + +// extractScopeLocation extracts location information for the given scope +func extractScopeLocation(tw *trap.Writer, scope *types.Scope, lbl trap.Label) { + fset := tw.Package.Fset + start, end := fset.Position(scope.Pos()), fset.Position(scope.End()) + extractLocation(tw, lbl, start.Line, start.Column, end.Line, end.Column-1) +} + +// extractScopes extracts symbol table information for the package scope and all local scopes +// of the given package. Note that this will not encounter methods or struct fields as +// they do not have a parent scope. +func extractScopes(tw *trap.Writer, nd *ast.File, pkg *packages.Package) { + pkgScopeLabel := extractPackageScope(tw, pkg) + fileScope := pkg.TypesInfo.Scopes[nd] + if fileScope != nil { + extractLocalScope(tw, fileScope, pkgScopeLabel) + } +} + +// extractLocalScope extracts symbol table information for the given scope and all its nested scopes +func extractLocalScope(tw *trap.Writer, scope *types.Scope, parentScopeLabel trap.Label) { + scopeLabel := tw.Labeler.ScopeID(scope, nil) + dbscheme.ScopesTable.Emit(tw, scopeLabel, dbscheme.LocalScopeType.Index()) + extractScopeLocation(tw, scope, scopeLabel) + dbscheme.ScopeNestingTable.Emit(tw, scopeLabel, parentScopeLabel) + + for i := 0; i < scope.NumChildren(); i++ { + childScope := scope.Child(i) + extractLocalScope(tw, childScope, scopeLabel) + } + + extractObjects(tw, scope, scopeLabel) +} + +// extractFileNode extracts AST information for the given file and all nodes contained in it +func extractFileNode(tw *trap.Writer, nd *ast.File) { + lbl := tw.Labeler.FileLabel() + + extractExpr(tw, nd.Name, lbl, 0) + + for i, decl := range nd.Decls { + extractDecl(tw, decl, lbl, i) + } + + for i, cg := range nd.Comments { + extractCommentGroup(tw, cg, lbl, i) + } + + extractDoc(tw, nd.Doc, lbl) + emitScopeNodeInfo(tw, nd, lbl) +} + +// extractDoc extracts information about a doc comment group associated with a given element +func extractDoc(tw *trap.Writer, doc *ast.CommentGroup, elt trap.Label) { + if doc != nil { + dbscheme.DocCommentsTable.Emit(tw, elt, tw.Labeler.LocalID(doc)) + } +} + +// extractCommentGroup extracts information about a doc comment group +func extractCommentGroup(tw *trap.Writer, cg *ast.CommentGroup, parent trap.Label, idx int) { + lbl := tw.Labeler.LocalID(cg) + dbscheme.CommentGroupsTable.Emit(tw, lbl, parent, idx) + extractNodeLocation(tw, cg, lbl) + for i, c := range cg.List { + extractComment(tw, c, lbl, i) + } +} + +// extractComment extracts information about a given comment +func extractComment(tw *trap.Writer, c *ast.Comment, parent trap.Label, idx int) { + lbl := tw.Labeler.LocalID(c) + rawText := c.Text + var kind int + var text string + if rawText[:2] == "//" { + kind = dbscheme.SlashSlashComment.Index() + text = rawText[2:] + } else { + kind = dbscheme.SlashStarComment.Index() + text = rawText[2 : len(rawText)-2] + } + dbscheme.CommentsTable.Emit(tw, lbl, kind, parent, idx, text) + extractNodeLocation(tw, c, lbl) +} + +// emitScopeNodeInfo associates an AST node with its induced scope, if any +func emitScopeNodeInfo(tw *trap.Writer, nd ast.Node, lbl trap.Label) { + scope, exists := tw.Package.TypesInfo.Scopes[nd] + if exists { + dbscheme.ScopeNodesTable.Emit(tw, lbl, tw.Labeler.ScopeID(scope, tw.Package.Types)) + } +} + +// extractExpr extracts AST information for the given expression and all its subexpressions +func extractExpr(tw *trap.Writer, expr ast.Expr, parent trap.Label, idx int) { + if expr == nil { + return + } + + lbl := tw.Labeler.LocalID(expr) + extractTypeOf(tw, expr, lbl) + + var kind int + switch expr := expr.(type) { + case *ast.BadExpr: + kind = dbscheme.BadExpr.Index() + case *ast.Ident: + if expr == nil { + return + } + kind = dbscheme.IdentExpr.Index() + dbscheme.LiteralsTable.Emit(tw, lbl, expr.Name, expr.Name) + def := tw.Package.TypesInfo.Defs[expr] + if def != nil { + defTyp := extractType(tw, def.Type()) + objlbl, exists := tw.Labeler.LookupObjectID(def, defTyp) + if objlbl == trap.InvalidLabel { + log.Printf("Omitting def binding to unknown object %v", def) + } else { + if !exists { + extractObject(tw, def, objlbl) + } + dbscheme.DefsTable.Emit(tw, lbl, objlbl) + } + } + use := getObjectBeingUsed(tw, expr) + if use != nil { + useTyp := extractType(tw, use.Type()) + objlbl, exists := tw.Labeler.LookupObjectID(use, useTyp) + if objlbl == trap.InvalidLabel { + log.Printf("Omitting use binding to unknown object %v", use) + } else { + if !exists { + extractObject(tw, use, objlbl) + } + dbscheme.UsesTable.Emit(tw, lbl, objlbl) + } + } + case *ast.Ellipsis: + if expr == nil { + return + } + kind = dbscheme.EllipsisExpr.Index() + extractExpr(tw, expr.Elt, lbl, 0) + case *ast.BasicLit: + if expr == nil { + return + } + value := "" + switch expr.Kind { + case token.INT: + ival, _ := strconv.ParseInt(expr.Value, 0, 64) + value = strconv.FormatInt(ival, 10) + kind = dbscheme.IntLitExpr.Index() + case token.FLOAT: + value = expr.Value + kind = dbscheme.FloatLitExpr.Index() + case token.IMAG: + value = expr.Value + kind = dbscheme.ImagLitExpr.Index() + case token.CHAR: + value, _ = strconv.Unquote(expr.Value) + kind = dbscheme.CharLitExpr.Index() + case token.STRING: + value, _ = strconv.Unquote(expr.Value) + kind = dbscheme.StringLitExpr.Index() + default: + log.Fatalf("unknown literal kind %v", expr.Kind) + } + dbscheme.LiteralsTable.Emit(tw, lbl, value, expr.Value) + case *ast.FuncLit: + if expr == nil { + return + } + kind = dbscheme.FuncLitExpr.Index() + extractExpr(tw, expr.Type, lbl, 0) + extractStmt(tw, expr.Body, lbl, 1) + case *ast.CompositeLit: + if expr == nil { + return + } + kind = dbscheme.CompositeLitExpr.Index() + extractExpr(tw, expr.Type, lbl, 0) + extractExprs(tw, expr.Elts, lbl, 1, 1) + case *ast.ParenExpr: + if expr == nil { + return + } + kind = dbscheme.ParenExpr.Index() + extractExpr(tw, expr.X, lbl, 0) + case *ast.SelectorExpr: + if expr == nil { + return + } + kind = dbscheme.SelectorExpr.Index() + extractExpr(tw, expr.X, lbl, 0) + extractExpr(tw, expr.Sel, lbl, 1) + case *ast.IndexExpr: + if expr == nil { + return + } + typeofx := typeOf(tw, expr.X) + if typeofx == nil { + // We are missing type information for `expr.X`, so we cannot + // determine whether this is a generic function instantiation + // or not. + kind = dbscheme.IndexExpr.Index() + } else { + if _, ok := typeofx.Underlying().(*types.Signature); ok { + kind = dbscheme.GenericFunctionInstantiationExpr.Index() + } else { + // Can't distinguish between actual index expressions (into a + // map, array, slice, string or pointer to array) and generic + // type specialization expression, so we do it later in QL. + kind = dbscheme.IndexExpr.Index() + } + } + extractExpr(tw, expr.X, lbl, 0) + extractExpr(tw, expr.Index, lbl, 1) + case *ast.IndexListExpr: + if expr == nil { + return + } + typeofx := typeOf(tw, expr.X) + if typeofx == nil { + // We are missing type information for `expr.X`, so we cannot + // determine whether this is a generic function instantiation + // or not. + kind = dbscheme.GenericTypeInstantiationExpr.Index() + } else { + if _, ok := typeofx.Underlying().(*types.Signature); ok { + kind = dbscheme.GenericFunctionInstantiationExpr.Index() + } else { + kind = dbscheme.GenericTypeInstantiationExpr.Index() + } + } + extractExpr(tw, expr.X, lbl, 0) + extractExprs(tw, expr.Indices, lbl, 1, 1) + case *ast.SliceExpr: + if expr == nil { + return + } + kind = dbscheme.SliceExpr.Index() + extractExpr(tw, expr.X, lbl, 0) + extractExpr(tw, expr.Low, lbl, 1) + extractExpr(tw, expr.High, lbl, 2) + extractExpr(tw, expr.Max, lbl, 3) + case *ast.TypeAssertExpr: + if expr == nil { + return + } + kind = dbscheme.TypeAssertExpr.Index() + extractExpr(tw, expr.X, lbl, 0) + extractExpr(tw, expr.Type, lbl, 1) + case *ast.CallExpr: + if expr == nil { + return + } + kind = dbscheme.CallOrConversionExpr.Index() + extractExpr(tw, expr.Fun, lbl, 0) + extractExprs(tw, expr.Args, lbl, 1, 1) + if expr.Ellipsis.IsValid() { + dbscheme.HasEllipsisTable.Emit(tw, lbl) + } + case *ast.StarExpr: + if expr == nil { + return + } + kind = dbscheme.StarExpr.Index() + extractExpr(tw, expr.X, lbl, 0) + case *ast.KeyValueExpr: + if expr == nil { + return + } + kind = dbscheme.KeyValueExpr.Index() + extractExpr(tw, expr.Key, lbl, 0) + extractExpr(tw, expr.Value, lbl, 1) + case *ast.UnaryExpr: + if expr == nil { + return + } + if expr.Op == token.TILDE { + kind = dbscheme.TypeSetLiteralExpr.Index() + } else { + tp := dbscheme.UnaryExprs[expr.Op] + if tp == nil { + log.Fatalf("unsupported unary operator %s", expr.Op) + } + kind = tp.Index() + } + extractExpr(tw, expr.X, lbl, 0) + case *ast.BinaryExpr: + if expr == nil { + return + } + _, isUnionType := typeOf(tw, expr).(*types.Union) + if expr.Op == token.OR && isUnionType { + kind = dbscheme.TypeSetLiteralExpr.Index() + flattenBinaryExprTree(tw, expr, lbl, 0) + } else { + tp := dbscheme.BinaryExprs[expr.Op] + if tp == nil { + log.Fatalf("unsupported binary operator %s", expr.Op) + } + kind = tp.Index() + extractExpr(tw, expr.X, lbl, 0) + extractExpr(tw, expr.Y, lbl, 1) + } + case *ast.ArrayType: + if expr == nil { + return + } + kind = dbscheme.ArrayTypeExpr.Index() + extractExpr(tw, expr.Len, lbl, 0) + extractExpr(tw, expr.Elt, lbl, 1) + case *ast.StructType: + if expr == nil { + return + } + kind = dbscheme.StructTypeExpr.Index() + extractFields(tw, expr.Fields, lbl, 0, 1) + case *ast.FuncType: + if expr == nil { + return + } + kind = dbscheme.FuncTypeExpr.Index() + extractFields(tw, expr.Params, lbl, 0, 1) + extractFields(tw, expr.Results, lbl, -1, -1) + emitScopeNodeInfo(tw, expr, lbl) + case *ast.InterfaceType: + if expr == nil { + return + } + kind = dbscheme.InterfaceTypeExpr.Index() + // expr.Methods contains methods, embedded interfaces and type set + // literals. + makeTypeSetLiteralsUnionTyped(tw, expr.Methods) + extractFields(tw, expr.Methods, lbl, 0, 1) + case *ast.MapType: + if expr == nil { + return + } + kind = dbscheme.MapTypeExpr.Index() + extractExpr(tw, expr.Key, lbl, 0) + extractExpr(tw, expr.Value, lbl, 1) + case *ast.ChanType: + if expr == nil { + return + } + tp := dbscheme.ChanTypeExprs[expr.Dir] + if tp == nil { + log.Fatalf("unsupported channel direction %v", expr.Dir) + } + kind = tp.Index() + extractExpr(tw, expr.Value, lbl, 0) + default: + log.Fatalf("unknown expression of type %T", expr) + } + dbscheme.ExprsTable.Emit(tw, lbl, kind, parent, idx) + extractNodeLocation(tw, expr, lbl) + extractValueOf(tw, expr, lbl) +} + +// extractExprs extracts AST information for a list of expressions, which are children of +// the given parent +// `idx` is the index of the first child in the list, and `dir` is the index increment of +// each child over its preceding child (usually either 1 for assigning increasing indices, or +// -1 for decreasing indices) +func extractExprs(tw *trap.Writer, exprs []ast.Expr, parent trap.Label, idx int, dir int) { + if exprs != nil { + for _, expr := range exprs { + extractExpr(tw, expr, parent, idx) + idx += dir + } + } +} + +// extractTypeOf looks up the type of `expr`, extracts it if it hasn't previously been +// extracted, and associates it with `expr` in the `type_of` table +func extractTypeOf(tw *trap.Writer, expr ast.Expr, lbl trap.Label) { + tp := typeOf(tw, expr) + if tp != nil { + tplbl := extractType(tw, tp) + dbscheme.TypeOfTable.Emit(tw, lbl, tplbl) + } +} + +// extractValueOf looks up the value of `expr`, and associates it with `expr` in +// the `consts` table +func extractValueOf(tw *trap.Writer, expr ast.Expr, lbl trap.Label) { + tpVal := tw.Package.TypesInfo.Types[expr] + + if tpVal.Value != nil { + // if Value is non-nil, the expression has a constant value + + // note that string literals in import statements do not have an associated + // Value and so do not get added to the table + + var value string + exact := tpVal.Value.ExactString() + switch tpVal.Value.Kind() { + case constant.String: + // we need to unquote strings + value = constant.StringVal(tpVal.Value) + exact = constant.StringVal(tpVal.Value) + case constant.Float: + flval, _ := constant.Float64Val(tpVal.Value) + value = fmt.Sprintf("%.20g", flval) + case constant.Complex: + real, _ := constant.Float64Val(constant.Real(tpVal.Value)) + imag, _ := constant.Float64Val(constant.Imag(tpVal.Value)) + value = fmt.Sprintf("(%.20g + %.20gi)", real, imag) + default: + value = tpVal.Value.ExactString() + } + + dbscheme.ConstValuesTable.Emit(tw, lbl, value, exact) + } else if tpVal.IsNil() { + dbscheme.ConstValuesTable.Emit(tw, lbl, "nil", "nil") + } +} + +// extractFields extracts AST information for a list of fields, which are children of +// the given parent +// `idx` is the index of the first child in the list, and `dir` is the index increment of +// each child over its preceding child (usually either 1 for assigning increasing indices, or +// -1 for decreasing indices) +func extractFields(tw *trap.Writer, fields *ast.FieldList, parent trap.Label, idx int, dir int) { + if fields == nil || fields.List == nil { + return + } + for _, field := range fields.List { + lbl := tw.Labeler.LocalID(field) + dbscheme.FieldsTable.Emit(tw, lbl, parent, idx) + extractNodeLocation(tw, field, lbl) + if field.Names != nil { + for i, name := range field.Names { + extractExpr(tw, name, lbl, i+1) + } + } + extractExpr(tw, field.Type, lbl, 0) + extractExpr(tw, field.Tag, lbl, -1) + extractDoc(tw, field.Doc, lbl) + idx += dir + } +} + +// extractStmt extracts AST information for a given statement and all other statements or expressions +// nested inside it +func extractStmt(tw *trap.Writer, stmt ast.Stmt, parent trap.Label, idx int) { + if stmt == nil { + return + } + + lbl := tw.Labeler.LocalID(stmt) + var kind int + switch stmt := stmt.(type) { + case *ast.BadStmt: + kind = dbscheme.BadStmtType.Index() + case *ast.DeclStmt: + if stmt == nil { + return + } + kind = dbscheme.DeclStmtType.Index() + extractDecl(tw, stmt.Decl, lbl, 0) + case *ast.EmptyStmt: + kind = dbscheme.EmptyStmtType.Index() + case *ast.LabeledStmt: + if stmt == nil { + return + } + kind = dbscheme.LabeledStmtType.Index() + extractExpr(tw, stmt.Label, lbl, 0) + extractStmt(tw, stmt.Stmt, lbl, 1) + case *ast.ExprStmt: + if stmt == nil { + return + } + kind = dbscheme.ExprStmtType.Index() + extractExpr(tw, stmt.X, lbl, 0) + case *ast.SendStmt: + if stmt == nil { + return + } + kind = dbscheme.SendStmtType.Index() + extractExpr(tw, stmt.Chan, lbl, 0) + extractExpr(tw, stmt.Value, lbl, 1) + case *ast.IncDecStmt: + if stmt == nil { + return + } + if stmt.Tok == token.INC { + kind = dbscheme.IncStmtType.Index() + } else if stmt.Tok == token.DEC { + kind = dbscheme.DecStmtType.Index() + } else { + log.Fatalf("unsupported increment/decrement operator %v", stmt.Tok) + } + extractExpr(tw, stmt.X, lbl, 0) + case *ast.AssignStmt: + if stmt == nil { + return + } + tp := dbscheme.AssignStmtTypes[stmt.Tok] + if tp == nil { + log.Fatalf("unsupported assignment statement with operator %v", stmt.Tok) + } + kind = tp.Index() + extractExprs(tw, stmt.Lhs, lbl, -1, -1) + extractExprs(tw, stmt.Rhs, lbl, 1, 1) + case *ast.GoStmt: + if stmt == nil { + return + } + kind = dbscheme.GoStmtType.Index() + extractExpr(tw, stmt.Call, lbl, 0) + case *ast.DeferStmt: + if stmt == nil { + return + } + kind = dbscheme.DeferStmtType.Index() + extractExpr(tw, stmt.Call, lbl, 0) + case *ast.ReturnStmt: + kind = dbscheme.ReturnStmtType.Index() + extractExprs(tw, stmt.Results, lbl, 0, 1) + case *ast.BranchStmt: + if stmt == nil { + return + } + switch stmt.Tok { + case token.BREAK: + kind = dbscheme.BreakStmtType.Index() + case token.CONTINUE: + kind = dbscheme.ContinueStmtType.Index() + case token.GOTO: + kind = dbscheme.GotoStmtType.Index() + case token.FALLTHROUGH: + kind = dbscheme.FallthroughStmtType.Index() + default: + log.Fatalf("unsupported branch statement type %v", stmt.Tok) + } + extractExpr(tw, stmt.Label, lbl, 0) + case *ast.BlockStmt: + if stmt == nil { + return + } + kind = dbscheme.BlockStmtType.Index() + extractStmts(tw, stmt.List, lbl, 0, 1) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.IfStmt: + if stmt == nil { + return + } + kind = dbscheme.IfStmtType.Index() + extractStmt(tw, stmt.Init, lbl, 0) + extractExpr(tw, stmt.Cond, lbl, 1) + extractStmt(tw, stmt.Body, lbl, 2) + extractStmt(tw, stmt.Else, lbl, 3) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.CaseClause: + if stmt == nil { + return + } + kind = dbscheme.CaseClauseType.Index() + extractExprs(tw, stmt.List, lbl, -1, -1) + extractStmts(tw, stmt.Body, lbl, 0, 1) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.SwitchStmt: + if stmt == nil { + return + } + kind = dbscheme.ExprSwitchStmtType.Index() + extractStmt(tw, stmt.Init, lbl, 0) + extractExpr(tw, stmt.Tag, lbl, 1) + extractStmt(tw, stmt.Body, lbl, 2) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.TypeSwitchStmt: + if stmt == nil { + return + } + kind = dbscheme.TypeSwitchStmtType.Index() + extractStmt(tw, stmt.Init, lbl, 0) + extractStmt(tw, stmt.Assign, lbl, 1) + extractStmt(tw, stmt.Body, lbl, 2) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.CommClause: + if stmt == nil { + return + } + kind = dbscheme.CommClauseType.Index() + extractStmt(tw, stmt.Comm, lbl, 0) + extractStmts(tw, stmt.Body, lbl, 1, 1) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.SelectStmt: + kind = dbscheme.SelectStmtType.Index() + extractStmt(tw, stmt.Body, lbl, 0) + case *ast.ForStmt: + if stmt == nil { + return + } + kind = dbscheme.ForStmtType.Index() + extractStmt(tw, stmt.Init, lbl, 0) + extractExpr(tw, stmt.Cond, lbl, 1) + extractStmt(tw, stmt.Post, lbl, 2) + extractStmt(tw, stmt.Body, lbl, 3) + emitScopeNodeInfo(tw, stmt, lbl) + case *ast.RangeStmt: + if stmt == nil { + return + } + kind = dbscheme.RangeStmtType.Index() + extractExpr(tw, stmt.Key, lbl, 0) + extractExpr(tw, stmt.Value, lbl, 1) + extractExpr(tw, stmt.X, lbl, 2) + extractStmt(tw, stmt.Body, lbl, 3) + emitScopeNodeInfo(tw, stmt, lbl) + default: + log.Fatalf("unknown statement of type %T", stmt) + } + dbscheme.StmtsTable.Emit(tw, lbl, kind, parent, idx) + extractNodeLocation(tw, stmt, lbl) +} + +// extractStmts extracts AST information for a list of statements, which are children of +// the given parent +// `idx` is the index of the first child in the list, and `dir` is the index increment of +// each child over its preceding child (usually either 1 for assigning increasing indices, or +// -1 for decreasing indices) +func extractStmts(tw *trap.Writer, stmts []ast.Stmt, parent trap.Label, idx int, dir int) { + if stmts != nil { + for _, stmt := range stmts { + extractStmt(tw, stmt, parent, idx) + idx += dir + } + } + +} + +// extractDecl extracts AST information for the given declaration +func extractDecl(tw *trap.Writer, decl ast.Decl, parent trap.Label, idx int) { + lbl := tw.Labeler.LocalID(decl) + var kind int + switch decl := decl.(type) { + case *ast.BadDecl: + kind = dbscheme.BadDeclType.Index() + case *ast.GenDecl: + if decl == nil { + return + } + switch decl.Tok { + case token.IMPORT: + kind = dbscheme.ImportDeclType.Index() + case token.CONST: + kind = dbscheme.ConstDeclType.Index() + case token.TYPE: + kind = dbscheme.TypeDeclType.Index() + case token.VAR: + kind = dbscheme.VarDeclType.Index() + default: + log.Fatalf("unknown declaration of kind %v", decl.Tok) + } + for i, spec := range decl.Specs { + extractSpec(tw, spec, lbl, i) + } + extractDoc(tw, decl.Doc, lbl) + case *ast.FuncDecl: + if decl == nil { + return + } + kind = dbscheme.FuncDeclType.Index() + extractFields(tw, decl.Recv, lbl, -1, -1) + extractExpr(tw, decl.Name, lbl, 0) + extractExpr(tw, decl.Type, lbl, 1) + extractStmt(tw, decl.Body, lbl, 2) + extractDoc(tw, decl.Doc, lbl) + extractTypeParamDecls(tw, decl.Type.TypeParams, lbl) + + // Note that we currently don't extract any kind of declaration for + // receiver type parameters. There isn't an explicit declaration, but + // we could consider the index/indices of an IndexExpr/IndexListExpr + // receiver as declarations. + default: + log.Fatalf("unknown declaration of type %T", decl) + } + dbscheme.DeclsTable.Emit(tw, lbl, kind, parent, idx) + extractNodeLocation(tw, decl, lbl) +} + +// extractSpec extracts AST information for the given declaration specifier +func extractSpec(tw *trap.Writer, spec ast.Spec, parent trap.Label, idx int) { + lbl := tw.Labeler.LocalID(spec) + var kind int + switch spec := spec.(type) { + case *ast.ImportSpec: + if spec == nil { + return + } + kind = dbscheme.ImportSpecType.Index() + extractExpr(tw, spec.Name, lbl, 0) + extractExpr(tw, spec.Path, lbl, 1) + extractDoc(tw, spec.Doc, lbl) + case *ast.ValueSpec: + if spec == nil { + return + } + kind = dbscheme.ValueSpecType.Index() + for i, name := range spec.Names { + extractExpr(tw, name, lbl, -(1 + i)) + } + extractExpr(tw, spec.Type, lbl, 0) + extractExprs(tw, spec.Values, lbl, 1, 1) + extractDoc(tw, spec.Doc, lbl) + case *ast.TypeSpec: + if spec == nil { + return + } + if spec.Assign.IsValid() { + kind = dbscheme.AliasSpecType.Index() + } else { + kind = dbscheme.TypeDefSpecType.Index() + } + extractExpr(tw, spec.Name, lbl, 0) + extractTypeParamDecls(tw, spec.TypeParams, lbl) + extractExpr(tw, spec.Type, lbl, 1) + extractDoc(tw, spec.Doc, lbl) + } + dbscheme.SpecsTable.Emit(tw, lbl, kind, parent, idx) + extractNodeLocation(tw, spec, lbl) +} + +// extractType extracts type information for `tp` and returns its associated label; +// types are only extracted once, so the second time `extractType` is invoked it simply returns the label +func extractType(tw *trap.Writer, tp types.Type) trap.Label { + lbl, exists := getTypeLabel(tw, tp) + if !exists { + var kind int + switch tp := tp.(type) { + case *types.Basic: + branch := dbscheme.BasicTypes[tp.Kind()] + if branch == nil { + log.Fatalf("unknown basic type %v", tp.Kind()) + } + kind = branch.Index() + case *types.Array: + kind = dbscheme.ArrayType.Index() + dbscheme.ArrayLengthTable.Emit(tw, lbl, fmt.Sprintf("%d", tp.Len())) + extractElementType(tw, lbl, tp.Elem()) + case *types.Slice: + kind = dbscheme.SliceType.Index() + extractElementType(tw, lbl, tp.Elem()) + case *types.Struct: + kind = dbscheme.StructType.Index() + for i := 0; i < tp.NumFields(); i++ { + field := tp.Field(i) + + // ensure the field is associated with a label - note that + // struct fields do not have a parent scope, so they are not + // dealt with by `extractScopes` + fieldlbl, exists := tw.Labeler.FieldID(field, i, lbl) + if !exists { + extractObject(tw, field, fieldlbl) + } + + dbscheme.FieldStructsTable.Emit(tw, fieldlbl, lbl) + + name := field.Name() + if field.Embedded() { + name = "" + } + extractComponentType(tw, lbl, i, name, field.Type()) + } + case *types.Pointer: + kind = dbscheme.PointerType.Index() + extractBaseType(tw, lbl, tp.Elem()) + case *types.Interface: + kind = dbscheme.InterfaceType.Index() + for i := 0; i < tp.NumMethods(); i++ { + meth := tp.Method(i) + + // Note that methods do not have a parent scope, so they are + // not dealt with by `extractScopes` + extractMethod(tw, meth) + + extractComponentType(tw, lbl, i, meth.Name(), meth.Type()) + } + for i := 0; i < tp.NumEmbeddeds(); i++ { + component := tp.EmbeddedType(i) + if isNonUnionTypeSetLiteral(component) { + component = createUnionFromType(component) + } + extractComponentType(tw, lbl, -(i + 1), "", component) + } + case *types.Tuple: + kind = dbscheme.TupleType.Index() + for i := 0; i < tp.Len(); i++ { + extractComponentType(tw, lbl, i, "", tp.At(i).Type()) + } + case *types.Signature: + kind = dbscheme.SignatureType.Index() + params, results := tp.Params(), tp.Results() + if params != nil { + for i := 0; i < params.Len(); i++ { + param := params.At(i) + extractComponentType(tw, lbl, i+1, "", param.Type()) + } + } + if results != nil { + for i := 0; i < results.Len(); i++ { + result := results.At(i) + extractComponentType(tw, lbl, -(i + 1), "", result.Type()) + } + } + if tp.Variadic() { + dbscheme.VariadicTable.Emit(tw, lbl) + } + case *types.Map: + kind = dbscheme.MapType.Index() + extractKeyType(tw, lbl, tp.Key()) + extractElementType(tw, lbl, tp.Elem()) + case *types.Chan: + kind = dbscheme.ChanTypes[tp.Dir()].Index() + extractElementType(tw, lbl, tp.Elem()) + case *types.Named: + origintp := tp.Origin() + kind = dbscheme.NamedType.Index() + dbscheme.TypeNameTable.Emit(tw, lbl, origintp.Obj().Name()) + underlying := origintp.Underlying() + extractUnderlyingType(tw, lbl, underlying) + trackInstantiatedStructFields(tw, tp, origintp) + + entitylbl, exists := tw.Labeler.LookupObjectID(origintp.Obj(), lbl) + if entitylbl == trap.InvalidLabel { + log.Printf("Omitting type-object binding for unknown object %v.\n", origintp.Obj()) + } else { + if !exists { + extractObject(tw, origintp.Obj(), entitylbl) + } + dbscheme.TypeObjectTable.Emit(tw, lbl, entitylbl) + } + + // ensure all methods have labels - note that methods do not have a + // parent scope, so they are not dealt with by `extractScopes` + for i := 0; i < origintp.NumMethods(); i++ { + meth := origintp.Method(i) + + extractMethod(tw, meth) + } + + // associate all methods of underlying interface with this type + if underlyingInterface, ok := underlying.(*types.Interface); ok { + for i := 0; i < underlyingInterface.NumMethods(); i++ { + methlbl := extractMethod(tw, underlyingInterface.Method(i)) + dbscheme.MethodHostsTable.Emit(tw, methlbl, lbl) + } + } + case *types.TypeParam: + kind = dbscheme.TypeParamType.Index() + parentlbl := getTypeParamParentLabel(tw, tp) + constraintLabel := extractType(tw, tp.Constraint()) + dbscheme.TypeParamTable.Emit(tw, lbl, tp.Obj().Name(), constraintLabel, parentlbl, tp.Index()) + case *types.Union: + kind = dbscheme.TypeSetLiteral.Index() + for i := 0; i < tp.Len(); i++ { + term := tp.Term(i) + tildeStr := "" + if term.Tilde() { + tildeStr = "~" + } + extractComponentType(tw, lbl, i, tildeStr, term.Type()) + } + default: + log.Fatalf("unexpected type %T", tp) + } + dbscheme.TypesTable.Emit(tw, lbl, kind) + } + return lbl +} + +// getTypeLabel looks up the label associated with `tp`, creating a new label if +// it does not have one yet; the second result indicates whether the label +// already existed +// +// Type labels refer to global keys to ensure that if the same type is +// encountered during the extraction of different files it is still ultimately +// mapped to the same entity. In particular, this means that keys for compound +// types refer to the labels of their component types. For named types, the key +// is constructed from their globally unique ID. This prevents cyclic type keys +// since type recursion in Go always goes through named types. +func getTypeLabel(tw *trap.Writer, tp types.Type) (trap.Label, bool) { + lbl, exists := tw.Labeler.TypeLabels[tp] + if !exists { + switch tp := tp.(type) { + case *types.Basic: + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%d;basictype", tp.Kind())) + case *types.Array: + len := tp.Len() + elem := extractType(tw, tp.Elem()) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%d,{%s};arraytype", len, elem)) + case *types.Slice: + elem := extractType(tw, tp.Elem()) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("{%s};slicetype", elem)) + case *types.Struct: + var b strings.Builder + for i := 0; i < tp.NumFields(); i++ { + field := tp.Field(i) + fieldTypeLbl := extractType(tw, field.Type()) + if i > 0 { + b.WriteString(",") + } + name := field.Name() + if field.Embedded() { + name = "" + } + fmt.Fprintf(&b, "%s,{%s},%s", name, fieldTypeLbl, util.EscapeTrapSpecialChars(tp.Tag(i))) + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%s;structtype", b.String())) + case *types.Pointer: + base := extractType(tw, tp.Elem()) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("{%s};pointertype", base)) + case *types.Interface: + var b strings.Builder + for i := 0; i < tp.NumMethods(); i++ { + meth := tp.Method(i) + methLbl := extractType(tw, meth.Type()) + if i > 0 { + b.WriteString(",") + } + fmt.Fprintf(&b, "%s,{%s}", meth.Id(), methLbl) + } + b.WriteString(";") + for i := 0; i < tp.NumEmbeddeds(); i++ { + if i > 0 { + b.WriteString(",") + } + fmt.Fprintf(&b, "{%s}", extractType(tw, tp.EmbeddedType(i))) + } + // We note whether the interface is comparable so that we can + // distinguish the underlying type of `comparable` from an + // empty interface. + if tp.IsComparable() { + b.WriteString(";comparable") + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%s;interfacetype", b.String())) + case *types.Tuple: + var b strings.Builder + for i := 0; i < tp.Len(); i++ { + compLbl := extractType(tw, tp.At(i).Type()) + if i > 0 { + b.WriteString(",") + } + fmt.Fprintf(&b, "{%s}", compLbl) + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%s;tupletype", b.String())) + case *types.Signature: + var b strings.Builder + params, results := tp.Params(), tp.Results() + if params != nil { + for i := 0; i < params.Len(); i++ { + paramLbl := extractType(tw, params.At(i).Type()) + if i > 0 { + b.WriteString(",") + } + fmt.Fprintf(&b, "{%s}", paramLbl) + } + } + b.WriteString(";") + if results != nil { + for i := 0; i < results.Len(); i++ { + resultLbl := extractType(tw, results.At(i).Type()) + if i > 0 { + b.WriteString(",") + } + fmt.Fprintf(&b, "{%s}", resultLbl) + } + } + if tp.Variadic() { + b.WriteString(";variadic") + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%s;signaturetype", b.String())) + case *types.Map: + key := extractType(tw, tp.Key()) + value := extractType(tw, tp.Elem()) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("{%s},{%s};maptype", key, value)) + case *types.Chan: + dir := tp.Dir() + elem := extractType(tw, tp.Elem()) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%v,{%s};chantype", dir, elem)) + case *types.Named: + origintp := tp.Origin() + entitylbl, exists := tw.Labeler.LookupObjectID(origintp.Obj(), lbl) + if entitylbl == trap.InvalidLabel { + panic(fmt.Sprintf("Cannot construct label for named type %v (underlying object is %v).\n", origintp, origintp.Obj())) + } + if !exists { + extractObject(tw, origintp.Obj(), entitylbl) + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("{%s};namedtype", entitylbl)) + case *types.TypeParam: + parentlbl := getTypeParamParentLabel(tw, tp) + lbl = tw.Labeler.GlobalID(fmt.Sprintf("{%v},%s;typeparamtype", parentlbl, tp.Obj().Name())) + case *types.Union: + var b strings.Builder + for i := 0; i < tp.Len(); i++ { + compLbl := extractType(tw, tp.Term(i).Type()) + if i > 0 { + b.WriteString("|") + } + if tp.Term(i).Tilde() { + b.WriteString("~") + } + fmt.Fprintf(&b, "{%s}", compLbl) + } + lbl = tw.Labeler.GlobalID(fmt.Sprintf("%s;typesetliteraltype", b.String())) + default: + log.Fatalf("(getTypeLabel) unexpected type %T", tp) + } + tw.Labeler.TypeLabels[tp] = lbl + } + return lbl, exists +} + +// extractKeyType extracts `key` as the key type of the map type `mp` +func extractKeyType(tw *trap.Writer, mp trap.Label, key types.Type) { + dbscheme.KeyTypeTable.Emit(tw, mp, extractType(tw, key)) +} + +// extractElementType extracts `element` as the element type of the container type `container` +func extractElementType(tw *trap.Writer, container trap.Label, element types.Type) { + dbscheme.ElementTypeTable.Emit(tw, container, extractType(tw, element)) +} + +// extractBaseType extracts `base` as the base type of the pointer type `ptr` +func extractBaseType(tw *trap.Writer, ptr trap.Label, base types.Type) { + dbscheme.BaseTypeTable.Emit(tw, ptr, extractType(tw, base)) +} + +// extractUnderlyingType extracts `underlying` as the underlying type of the +// named type `named` +func extractUnderlyingType(tw *trap.Writer, named trap.Label, underlying types.Type) { + dbscheme.UnderlyingTypeTable.Emit(tw, named, extractType(tw, underlying)) +} + +// extractComponentType extracts `component` as the `idx`th component type of `parent` with name `name` +func extractComponentType(tw *trap.Writer, parent trap.Label, idx int, name string, component types.Type) { + dbscheme.ComponentTypesTable.Emit(tw, parent, idx, name, extractType(tw, component)) +} + +// extractNumLines extracts lines-of-code and lines-of-comments information for the +// given file +func extractNumLines(tw *trap.Writer, fileName string, ast *ast.File) { + f := tw.Package.Fset.File(ast.Pos()) + + lineCount := f.LineCount() + + // count lines of code by tokenizing + linesOfCode := 0 + src, err := ioutil.ReadFile(fileName) + if err != nil { + log.Fatalf("Unable to read file %s.", fileName) + } + var s scanner.Scanner + lastCodeLine := -1 + s.Init(f, src, nil, 0) + for { + pos, tok, lit := s.Scan() + if tok == token.EOF { + break + } else if tok != token.ILLEGAL && !(tok == token.SEMICOLON && lit == "\n") { + // specifically exclude newlines that are treated as semicolons + tkStartLine := f.Position(pos).Line + tkEndLine := tkStartLine + strings.Count(lit, "\n") + if tkEndLine > lastCodeLine { + if tkStartLine <= lastCodeLine { + // if the start line is the same as the last code line we've seen we don't want to double + // count it + // note tkStartLine < lastCodeLine should not be possible + linesOfCode += tkEndLine - lastCodeLine + } else { + linesOfCode += tkEndLine - tkStartLine + 1 + } + lastCodeLine = tkEndLine + } + } + } + + // count lines of comments by iterating over ast.Comments + linesOfComments := 0 + for _, cg := range ast.Comments { + for _, g := range cg.List { + fset := tw.Package.Fset + startPos, endPos := fset.Position(g.Pos()), fset.Position(g.End()) + linesOfComments += endPos.Line - startPos.Line + 1 + } + } + + dbscheme.NumlinesTable.Emit(tw, tw.Labeler.FileLabel(), lineCount, linesOfCode, linesOfComments) +} + +// For a type `t` which is the type of a field of an interface type, return +// whether `t` a type set literal which is not a union type. Note that a field +// of an interface must be a method signature, an embedded interface type or a +// type set literal. +func isNonUnionTypeSetLiteral(t types.Type) bool { + if t == nil { + return false + } + switch t.Underlying().(type) { + case *types.Interface, *types.Union, *types.Signature: + return false + default: + return true + } +} + +// Given a type `t`, return a union with a single term that is `t` without a +// tilde. +func createUnionFromType(t types.Type) *types.Union { + return types.NewUnion([]*types.Term{types.NewTerm(false, t)}) +} + +// Go through a `FieldList` and update the types of all type set literals which +// are not already union types to be union types. We do this by changing the +// types stored in `tw.Package.TypesInfo.Types`. Type set literals can only +// occur in two places: a type parameter declaration or a type in an interface. +func makeTypeSetLiteralsUnionTyped(tw *trap.Writer, fields *ast.FieldList) { + if fields == nil || fields.List == nil { + return + } + for i := 0; i < len(fields.List); i++ { + x := fields.List[i].Type + if _, alreadyOverridden := tw.TypesOverride[x]; !alreadyOverridden { + xtp := typeOf(tw, x) + if isNonUnionTypeSetLiteral(xtp) { + tw.TypesOverride[x] = createUnionFromType(xtp) + } + } + } +} + +func typeOf(tw *trap.Writer, e ast.Expr) types.Type { + if val, ok := tw.TypesOverride[e]; ok { + return val + } + return tw.Package.TypesInfo.TypeOf(e) +} + +func flattenBinaryExprTree(tw *trap.Writer, e ast.Expr, parent trap.Label, idx int) int { + binaryexpr, ok := e.(*ast.BinaryExpr) + if ok { + idx = flattenBinaryExprTree(tw, binaryexpr.X, parent, idx) + idx = flattenBinaryExprTree(tw, binaryexpr.Y, parent, idx) + } else { + extractExpr(tw, e, parent, idx) + idx = idx + 1 + } + return idx +} + +func extractTypeParamDecls(tw *trap.Writer, fields *ast.FieldList, parent trap.Label) { + if fields == nil || fields.List == nil { + return + } + + // Type set literals can occur as the type in a type parameter declaration, + // so we ensure that they are union typed. + makeTypeSetLiteralsUnionTyped(tw, fields) + + idx := 0 + for _, field := range fields.List { + lbl := tw.Labeler.LocalID(field) + dbscheme.TypeParamDeclsTable.Emit(tw, lbl, parent, idx) + extractNodeLocation(tw, field, lbl) + if field.Names != nil { + for i, name := range field.Names { + extractExpr(tw, name, lbl, i+1) + } + } + extractExpr(tw, field.Type, lbl, 0) + extractDoc(tw, field.Doc, lbl) + idx += 1 + } +} + +// populateTypeParamParents sets `parent` as the parent of the elements of `typeparams` +func populateTypeParamParents(tw *trap.Writer, typeparams *types.TypeParamList, parent types.Object) { + if typeparams != nil { + for idx := 0; idx < typeparams.Len(); idx++ { + setTypeParamParent(typeparams.At(idx), parent) + } + } +} + +// getobjectBeingUsed looks up `ident` in `tw.Package.TypesInfo.Uses` and makes +// some changes to the object to avoid returning objects relating to instantiated +// types. +func getObjectBeingUsed(tw *trap.Writer, ident *ast.Ident) types.Object { + obj := tw.Package.TypesInfo.Uses[ident] + if obj == nil { + return nil + } + if override, ok := tw.ObjectsOverride[obj]; ok { + return override + } + if funcObj, ok := obj.(*types.Func); ok { + sig := funcObj.Type().(*types.Signature) + if recv := sig.Recv(); recv != nil { + recvType := recv.Type() + originType, isSame := tryGetGenericType(recvType) + + if originType == nil { + if pointerType, ok := recvType.(*types.Pointer); ok { + originType, isSame = tryGetGenericType(pointerType.Elem()) + } + } + + if originType == nil || isSame { + return obj + } + + for i := 0; i < originType.NumMethods(); i++ { + meth := originType.Method(i) + if meth.Name() == funcObj.Name() { + return meth + } + } + if interfaceType, ok := originType.Underlying().(*types.Interface); ok { + for i := 0; i < interfaceType.NumMethods(); i++ { + meth := interfaceType.Method(i) + if meth.Name() == funcObj.Name() { + return meth + } + } + } + log.Fatalf("Could not find method %s on type %s", funcObj.Name(), originType) + } + } + + return obj +} + +// tryGetGenericType returns the generic type of `tp`, and a boolean indicating +// whether it is the same as `tp`. +func tryGetGenericType(tp types.Type) (*types.Named, bool) { + if namedType, ok := tp.(*types.Named); ok { + originType := namedType.Origin() + return originType, namedType == originType + } + return nil, false +} + +// trackInstantiatedStructFields tries to give the fields of an instantiated +// struct type underlying `tp` the same labels as the corresponding fields of +// the generic struct type. This is so that when we come across the +// instantiated field in `tw.Package.TypesInfo.Uses` we will get the label for +// the generic field instead. +func trackInstantiatedStructFields(tw *trap.Writer, tp, origintp *types.Named) { + if tp == origintp { + return + } + + if instantiatedStruct, ok := tp.Underlying().(*types.Struct); ok { + genericStruct, ok2 := origintp.Underlying().(*types.Struct) + if !ok2 { + log.Fatalf( + "Error: underlying type of instantiated type is a struct but underlying type of generic type is %s", + origintp.Underlying()) + } + + if instantiatedStruct.NumFields() != genericStruct.NumFields() { + log.Fatalf( + "Error: instantiated struct %s has different number of fields than the generic version %s (%d != %d)", + instantiatedStruct, genericStruct, instantiatedStruct.NumFields(), genericStruct.NumFields()) + } + + for i := 0; i < instantiatedStruct.NumFields(); i++ { + tw.ObjectsOverride[instantiatedStruct.Field(i)] = genericStruct.Field(i) + } + } +} + +func getTypeParamParentLabel(tw *trap.Writer, tp *types.TypeParam) trap.Label { + parent, exists := typeParamParent[tp] + if !exists { + log.Fatalf("Parent of type parameter does not exist: %s %s", tp.String(), tp.Constraint().String()) + } + parentlbl, _ := tw.Labeler.ScopedObjectID(parent, func() trap.Label { + log.Fatalf("getTypeLabel() called for parent of type parameter %s", tp.String()) + return trap.InvalidLabel + }) + return parentlbl +} + +func setTypeParamParent(tp *types.TypeParam, newobj types.Object) { + obj, exists := typeParamParent[tp] + if !exists { + typeParamParent[tp] = newobj + } else if newobj != obj { + log.Fatalf("Parent of type parameter '%s %s' being set to a different value: '%s' vs '%s'", tp.String(), tp.Constraint().String(), obj, newobj) + } +} diff --git a/go/extractor/gomodextractor.go b/go/extractor/gomodextractor.go new file mode 100644 index 00000000000..5a6a26281bc --- /dev/null +++ b/go/extractor/gomodextractor.go @@ -0,0 +1,201 @@ +package extractor + +import ( + "fmt" + "golang.org/x/mod/modfile" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" + + "github.com/github/codeql-go/extractor/dbscheme" + "github.com/github/codeql-go/extractor/srcarchive" + "github.com/github/codeql-go/extractor/trap" +) + +func (extraction *Extraction) extractGoMod(path string) error { + if normPath, err := filepath.EvalSymlinks(path); err == nil { + path = normPath + } + + extraction.Lock.Lock() + if extraction.SeenGoMods[path] { + extraction.Lock.Unlock() + return nil + } + + extraction.SeenGoMods[path] = true + extraction.Lock.Unlock() + + tw, err := trap.NewWriter(path, nil) + if err != nil { + return err + } + defer tw.Close() + + err = srcarchive.Add(path) + if err != nil { + return err + } + + extraction.extractFileInfo(tw, path) + + file, err := os.Open(path) + if err != nil { + return fmt.Errorf("failed to open go.mod file %s: %s", path, err.Error()) + } + data, err := ioutil.ReadAll(file) + if err != nil { + return fmt.Errorf("failed to read go.mod file %s: %s", path, err.Error()) + } + + modfile, err := modfile.Parse(path, data, nil) + if err != nil { + return fmt.Errorf("failed to parse go.mod file %s: %s", path, err.Error()) + } + + extractGoModFile(tw, modfile.Syntax) + + return nil +} + +type commentGroupIdxAllocator struct { + nextIdx int +} + +func (cgIdxAlloc *commentGroupIdxAllocator) nextCgIdx() int { + ret := cgIdxAlloc.nextIdx + cgIdxAlloc.nextIdx++ + return ret +} + +func extractGoModFile(tw *trap.Writer, file *modfile.FileSyntax) { + cgIdxAlloc := commentGroupIdxAllocator{0} + + for idx, stmt := range file.Stmt { + extractGoModExpr(tw, stmt, tw.Labeler.FileLabel(), idx, &cgIdxAlloc) + } + + extractGoModComments(tw, file, tw.Labeler.FileLabel(), &cgIdxAlloc) +} + +func extractGoModExpr(tw *trap.Writer, expr modfile.Expr, parent trap.Label, idx int, cgIdxAlloc *commentGroupIdxAllocator) { + lbl := tw.Labeler.LocalID(expr) + + var kind int + switch expr := expr.(type) { + case *modfile.CommentBlock: + kind = dbscheme.ModCommentBlockType.Index() + case *modfile.LParen: + kind = dbscheme.ModLParenType.Index() + case *modfile.RParen: + kind = dbscheme.ModRParenType.Index() + case *modfile.Line: + kind = dbscheme.ModLineType.Index() + for idx, tok := range expr.Token { + dbscheme.ModTokensTable.Emit(tw, tok, lbl, idx) + } + case *modfile.LineBlock: + kind = dbscheme.ModLineBlockType.Index() + for idx, tok := range expr.Token { + dbscheme.ModTokensTable.Emit(tw, tok, lbl, idx) + } + extractGoModExpr(tw, &expr.LParen, lbl, 0, cgIdxAlloc) + for idx, line := range expr.Line { + extractGoModExpr(tw, line, lbl, idx+1, cgIdxAlloc) + } + extractGoModExpr(tw, &expr.RParen, lbl, len(expr.Line)+1, cgIdxAlloc) + default: + log.Fatalf("unknown go.mod expression of type %T", expr) + } + + dbscheme.ModExprsTable.Emit(tw, lbl, kind, parent, idx) + + extractGoModComments(tw, expr, lbl, cgIdxAlloc) + + start, end := expr.Span() + extractLocation(tw, lbl, start.Line, start.LineRune, end.Line, end.LineRune) +} + +type GoModExprCommentWrapper struct { + expr modfile.Expr +} + +func minInt(a int, b int) int { + if a < b { + return a + } + return b +} + +func maxInt(a int, b int) int { + if a > b { + return a + } + return b +} + +func lexMin(a1 int, a2 int, b1 int, b2 int) (int, int) { + if a1 < b1 { + return a1, a2 + } else if a1 > b1 { + return b1, b2 + } else { + return a1, minInt(b1, b2) + } +} + +func lexMax(a1 int, a2 int, b1 int, b2 int) (int, int) { + if a1 < b1 { + return b1, b2 + } else if a1 > b1 { + return a1, a2 + } else { + return a1, maxInt(b1, b2) + } +} + +func extractGoModComments(tw *trap.Writer, expr modfile.Expr, exprlbl trap.Label, cgIdxAlloc *commentGroupIdxAllocator) { + comments := expr.Comment() + + if len(comments.Before) == 0 && len(comments.Suffix) == 0 && len(comments.After) == 0 { + return + } + + // extract a pseudo `@commentgroup` for each expr that contains their associated comments + grouplbl := tw.Labeler.LocalID(GoModExprCommentWrapper{expr}) + dbscheme.CommentGroupsTable.Emit(tw, grouplbl, tw.Labeler.FileLabel(), cgIdxAlloc.nextCgIdx()) + dbscheme.DocCommentsTable.Emit(tw, exprlbl, grouplbl) + + var allComments []modfile.Comment + allComments = append(allComments, comments.Before...) + allComments = append(allComments, comments.Suffix...) + allComments = append(allComments, comments.After...) + + var startLine, startCol, endLine, endCol int = 0, 0, 0, 0 + var first bool = true + idx := 0 + for _, comment := range allComments { + commentToken := strings.TrimSuffix(strings.TrimSuffix(comment.Token, "\n"), "\r") + extractGoModComment(tw, comment, commentToken, grouplbl, idx) + idx++ + commentEndCol := comment.Start.LineRune + (len(commentToken) - 1) + if first { + startLine, startCol, endLine, endCol = comment.Start.Line, comment.Start.LineRune, comment.Start.Line, commentEndCol + first = false + } else { + startLine, startCol = lexMin(comment.Start.Line, comment.Start.LineRune, startLine, startCol) + endLine, endCol = lexMax(comment.Start.Line, commentEndCol, endLine, endCol) + } + } + + extractLocation(tw, grouplbl, startLine, startCol, endLine, endCol) +} + +func extractGoModComment(tw *trap.Writer, comment modfile.Comment, commentToken string, grouplbl trap.Label, idx int) { + lbl := tw.Labeler.LocalID(comment) + dbscheme.CommentsTable.Emit(tw, lbl, dbscheme.SlashSlashComment.Index(), grouplbl, idx, commentToken) + + extractLocation(tw, lbl, comment.Start.Line, comment.Start.LineRune, comment.Start.Line, comment.Start.LineRune+(len(commentToken)-1)) +} diff --git a/go/extractor/net/sourceforge/pmd/cpd/AbstractLanguage.java b/go/extractor/net/sourceforge/pmd/cpd/AbstractLanguage.java new file mode 100644 index 00000000000..363be4e8fa1 --- /dev/null +++ b/go/extractor/net/sourceforge/pmd/cpd/AbstractLanguage.java @@ -0,0 +1,13 @@ +package net.sourceforge.pmd.cpd; + +/* + * This is a stub definition for pmd's AbstractLanguage class + * including only the API used by the GoLanguage class. + */ + +public abstract class AbstractLanguage { + + public AbstractLanguage(String... extensions) {} + + public abstract Tokenizer getTokenizer(boolean fuzzyMatch); +} diff --git a/go/extractor/net/sourceforge/pmd/cpd/GoLanguage.java b/go/extractor/net/sourceforge/pmd/cpd/GoLanguage.java new file mode 100644 index 00000000000..400a52c0e0d --- /dev/null +++ b/go/extractor/net/sourceforge/pmd/cpd/GoLanguage.java @@ -0,0 +1,68 @@ +package net.sourceforge.pmd.cpd; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.lang.ProcessBuilder.Redirect; +import java.nio.charset.Charset; +import java.nio.file.Paths; +import java.util.List; +import opencsv.CSVReader; + +public class GoLanguage extends AbstractLanguage { + public GoLanguage() { + super(".go"); + } + + @Override + public Tokenizer getTokenizer(final boolean fuzzyMatch) { + return new Tokenizer() { + @Override + public void tokenize(SourceCode tokens, List tokenEntries) { + String fileName = tokens.getFileName(); + String platform = "linux", exe = ""; + + String osName = System.getProperty("os.name", "unknown"); + if (osName.contains("Windows")) { + platform = "win"; + exe = ".exe"; + } else if (osName.contains("Mac OS X")) { + platform = "osx"; + } + + // get tools folder from SEMMLE_DIST + String toolsDir = null; + String dist = System.getenv("SEMMLE_DIST"); + if (dist != null && !dist.isEmpty()) { + toolsDir = dist + "/language-packs/go/tools/platform/" + platform; + } + + String goTokenizer = toolsDir == null ? "go-tokenizer" : toolsDir + "/bin/go-tokenizer"; + goTokenizer += exe; + ProcessBuilder pb = new ProcessBuilder(Paths.get(goTokenizer).toString(), fileName); + pb.redirectError(Redirect.INHERIT); + try { + Process process = pb.start(); + try ( + CSVReader r = new CSVReader(new InputStreamReader(process.getInputStream(), Charset.forName("UTF-8"))) + ) { + String[] row; + while ((row = r.readNext()) != null) { + String text = row[0]; + String fuzzyText = row[1]; + int beginLine = Integer.parseInt(row[2]); + int beginColumn = Integer.parseInt(row[3]); + int endLine = Integer.parseInt(row[4]); + int endColumn = Integer.parseInt(row[5]); + tokenEntries.add(new TokenEntry(fuzzyMatch ? text : fuzzyText, fileName, beginLine, beginColumn, endLine, endColumn)); + } + } + int exitCode = process.waitFor(); + if (exitCode != 0) + throw new RuntimeException("Tokenizing " + fileName + " returned " + exitCode + "."); + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } + }; + } +} diff --git a/go/extractor/net/sourceforge/pmd/cpd/SourceCode.java b/go/extractor/net/sourceforge/pmd/cpd/SourceCode.java new file mode 100644 index 00000000000..f3ad49f8343 --- /dev/null +++ b/go/extractor/net/sourceforge/pmd/cpd/SourceCode.java @@ -0,0 +1,12 @@ +package net.sourceforge.pmd.cpd; + +/* + * This is a stub definition for pmd's SourceCode class + * including only the API used by the GoLanguage class. + */ + +public class SourceCode { + public String getFileName() { + return null; + } +} diff --git a/go/extractor/net/sourceforge/pmd/cpd/TokenEntry.java b/go/extractor/net/sourceforge/pmd/cpd/TokenEntry.java new file mode 100644 index 00000000000..d599ebea367 --- /dev/null +++ b/go/extractor/net/sourceforge/pmd/cpd/TokenEntry.java @@ -0,0 +1,11 @@ +package net.sourceforge.pmd.cpd; + +/* + * This is a stub definition for pmd's TokenEntry class + * including only the API used by the GoLanguage class. + */ + +public class TokenEntry { + public TokenEntry(String image, String tokenSrcID, int beginLine, int beginColumn, int endLine, int endColumn) { + } +} diff --git a/go/extractor/net/sourceforge/pmd/cpd/Tokenizer.java b/go/extractor/net/sourceforge/pmd/cpd/Tokenizer.java new file mode 100644 index 00000000000..cc94bbbb149 --- /dev/null +++ b/go/extractor/net/sourceforge/pmd/cpd/Tokenizer.java @@ -0,0 +1,12 @@ +package net.sourceforge.pmd.cpd; + +/* + * This is a stub definition for pmd's Tokenizer interface + * including only the API used by the GoLanguage class. + */ + +import java.util.List; + +public interface Tokenizer { + void tokenize(SourceCode tokens, List tokenEntries); +} diff --git a/go/extractor/opencsv/CSVParser.java b/go/extractor/opencsv/CSVParser.java new file mode 100644 index 00000000000..e3a864bef3f --- /dev/null +++ b/go/extractor/opencsv/CSVParser.java @@ -0,0 +1,207 @@ +/** + Copyright 2005 Bytecode Pty Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +package opencsv; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * A very simple CSV parser released under a commercial-friendly license. + * This just implements splitting a single line into fields. + * + * @author Glen Smith + * @author Rainer Pruy + * + */ +public class CSVParser { + + private final char separator; + + private final char quotechar; + + private final char escape; + + private final boolean strictQuotes; + + private StringBuilder buf = new StringBuilder(INITIAL_READ_SIZE); + + /** The default separator to use if none is supplied to the constructor. */ + public static final char DEFAULT_SEPARATOR = ','; + + private static final int INITIAL_READ_SIZE = 128; + + /** + * The default quote character to use if none is supplied to the + * constructor. + */ + public static final char DEFAULT_QUOTE_CHARACTER = '"'; + + + /** + * The default escape character to use if none is supplied to the + * constructor. + */ + public static final char DEFAULT_ESCAPE_CHARACTER = '"'; + + /** + * The default strict quote behavior to use if none is supplied to the + * constructor + */ + public static final boolean DEFAULT_STRICT_QUOTES = false; + + /** + * Constructs CSVReader with supplied separator and quote char. + * Allows setting the "strict quotes" flag + * @param separator + * the delimiter to use for separating entries + * @param quotechar + * the character to use for quoted elements + * @param escape + * the character to use for escaping a separator or quote + * @param strictQuotes + * if true, characters outside the quotes are ignored + */ + CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) { + this.separator = separator; + this.quotechar = quotechar; + this.escape = escape; + this.strictQuotes = strictQuotes; + } + + /** + * + * @return true if something was left over from last call(s) + */ + public boolean isPending() { + return buf.length() != 0; + } + + public String[] parseLineMulti(String nextLine) throws IOException { + return parseLine(nextLine, true); + } + + public String[] parseLine(String nextLine) throws IOException { + return parseLine(nextLine, false); + } + /** + * Parses an incoming String and returns an array of elements. + * + * @param nextLine + * the string to parse + * @return the comma-tokenized list of elements, or null if nextLine is null + * @throws IOException if bad things happen during the read + */ + private String[] parseLine(String nextLine, boolean multi) throws IOException { + + if (!multi && isPending()) { + clear(); + } + + if (nextLine == null) { + if (isPending()) { + String s = buf.toString(); + clear(); + return new String[] {s}; + } else { + return null; + } + } + + ListtokensOnThisLine = new ArrayList(); + boolean inQuotes = isPending(); + for (int i = 0; i < nextLine.length(); i++) { + + char c = nextLine.charAt(i); + if (c == this.escape && isNextCharacterEscapable(nextLine, inQuotes, i)) { + buf.append(nextLine.charAt(i+1)); + i++; + } else if (c == quotechar) { + if( isNextCharacterEscapedQuote(nextLine, inQuotes, i) ){ + buf.append(nextLine.charAt(i+1)); + i++; + }else{ + inQuotes = !inQuotes; + // the tricky case of an embedded quote in the middle: a,bc"d"ef,g + if (!strictQuotes) { + if(i>2 //not on the beginning of the line + && nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence + && nextLine.length()>(i+1) && + nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence + ){ + buf.append(c); + } + } + } + } else if (c == separator && !inQuotes) { + tokensOnThisLine.add(buf.toString()); + clear(); // start work on next token + } else { + if (!strictQuotes || inQuotes) + buf.append(c); + } + } + // line is done - check status + if (inQuotes) { + if (multi) { + // continuing a quoted section, re-append newline + buf.append('\n'); + // this partial content is not to be added to field list yet + } else { + throw new IOException("Un-terminated quoted field at end of CSV line"); + } + } else { + tokensOnThisLine.add(buf.toString()); + clear(); + } + return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]); + + } + + /** + * precondition: the current character is a quote or an escape + * @param nextLine the current line + * @param inQuotes true if the current context is quoted + * @param i current index in line + * @return true if the following character is a quote + */ + private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) { + return inQuotes // we are in quotes, therefore there can be escaped quotes in here. + && nextLine.length() > (i+1) // there is indeed another character to check. + && nextLine.charAt(i+1) == quotechar; + } + + /** + * precondition: the current character is an escape + * @param nextLine the current line + * @param inQuotes true if the current context is quoted + * @param i current index in line + * @return true if the following character is a quote + */ + protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) { + return inQuotes // we are in quotes, therefore there can be escaped quotes in here. + && nextLine.length() > (i+1) // there is indeed another character to check. + && ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape); + } + + /** + * Reset the buffer used for storing the current field's value + */ + private void clear() { + buf.setLength(0); + } +} diff --git a/go/extractor/opencsv/CSVReader.java b/go/extractor/opencsv/CSVReader.java new file mode 100644 index 00000000000..11b537e4995 --- /dev/null +++ b/go/extractor/opencsv/CSVReader.java @@ -0,0 +1,192 @@ +/** + Copyright 2005 Bytecode Pty Ltd. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +package opencsv; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; + +/** + * A very simple CSV reader released under a commercial-friendly license. + * + * @author Glen Smith + * + */ +public class CSVReader implements Closeable { + + private final BufferedReader br; + + private boolean hasNext = true; + + private final CSVParser parser; + + private final int skipLines; + + private boolean linesSkipped; + + /** The line number of the last physical line read (one-based). */ + private int curline = 0; + + /** The physical line number at which the last logical line read started (one-based). */ + private int startLine = 0; + + /** + * The default line to start reading. + */ + private static final int DEFAULT_SKIP_LINES = 0; + + /** + * Constructs CSVReader using a comma for the separator. + * + * @param reader + * the reader to an underlying CSV source. + */ + public CSVReader(Reader reader) { + this(reader, + CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, + CSVParser.DEFAULT_STRICT_QUOTES); + } + + /** + * Constructs CSVReader with supplied separator and quote char. + * + * @param reader + * the reader to an underlying CSV source. + * @param separator + * the delimiter to use for separating entries + * @param quotechar + * the character to use for quoted elements + * @param escape + * the character to use for escaping a separator or quote + * @param line + * the line number to skip for start reading + * @param strictQuotes + * sets if characters outside the quotes are ignored + */ + private CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) { + this.br = new BufferedReader(reader); + this.parser = new CSVParser(separator, quotechar, escape, strictQuotes); + this.skipLines = line; + } + + + /** + * Reads the entire file into a List with each element being a String[] of + * tokens. + * + * @return a List of String[], with each String[] representing a line of the + * file. + * + * @throws IOException + * if bad things happen during the read + */ + public List readAll() throws IOException { + + List
Find access to an array with a Uint16 when the array has a size lower than 256.
Use a int with a lower bit size instead. For instance in this example use a 8 bit int.