diff --git a/.github/actions/cache-query-compilation/action.yml b/.github/actions/cache-query-compilation/action.yml
index 2a0358ae63c..0ad0b7da982 100644
--- a/.github/actions/cache-query-compilation/action.yml
+++ b/.github/actions/cache-query-compilation/action.yml
@@ -9,7 +9,7 @@ inputs:
outputs:
cache-dir:
description: "The directory where the cache was stored"
- value: ${{ steps.fill-compilation-dir.outputs.compdir }}
+ value: ${{ steps.output-compilation-dir.outputs.compdir }}
runs:
using: composite
@@ -27,7 +27,9 @@ runs:
if: ${{ github.event_name == 'pull_request' }}
uses: actions/cache/restore@v3
with:
- path: '**/.cache'
+ path: |
+ **/.cache
+ ~/.codeql/compile-cache
key: codeql-compile-${{ inputs.key }}-pr-${{ github.sha }}
restore-keys: |
codeql-compile-${{ inputs.key }}-${{ github.base_ref }}-${{ env.merge_base }}
@@ -37,18 +39,111 @@ runs:
if: ${{ github.event_name != 'pull_request' }}
uses: actions/cache@v3
with:
- path: '**/.cache'
+ path: |
+ **/.cache
+ ~/.codeql/compile-cache
key: codeql-compile-${{ inputs.key }}-${{ github.ref_name }}-${{ github.sha }} # just fill on main
restore-keys: | # restore the latest cache if the exact cache is unavailable, to speed up compilation.
codeql-compile-${{ inputs.key }}-${{ github.ref_name }}-
codeql-compile-${{ inputs.key }}-main-
- - name: Fill compilation cache directory
- id: fill-compilation-dir
+ - name: Output-compilationdir
+ id: output-compilation-dir
shell: bash
run: |
- # Move all the existing cache into another folder, so we only preserve the cache for the current queries.
- node $GITHUB_WORKSPACE/.github/actions/cache-query-compilation/move-caches.js ${COMBINED_CACHE_DIR}
-
echo "compdir=${COMBINED_CACHE_DIR}" >> $GITHUB_OUTPUT
env:
COMBINED_CACHE_DIR: ${{ runner.temp }}/compilation-dir
+ - name: Fill compilation cache directory
+ id: fill-compilation-dir
+ uses: actions/github-script@v6
+ env:
+ COMBINED_CACHE_DIR: ${{ runner.temp }}/compilation-dir
+ with:
+ script: |
+ // # Move all the existing cache into another folder, so we only preserve the cache for the current queries.
+ // mkdir -p ${COMBINED_CACHE_DIR}
+ // rm -f **/.cache/{lock,size} # -f to avoid errors if the cache is empty.
+ // # copy the contents of the .cache folders into the combined cache folder.
+ // cp -r **/.cache/* ${COMBINED_CACHE_DIR}/ || : # ignore missing files
+ // # clean up the .cache folders
+ // rm -rf **/.cache/*
+
+ const fs = require("fs");
+ const path = require("path");
+ const os = require("os");
+
+ // the first argv is the cache folder to create.
+ const COMBINED_CACHE_DIR = process.env.COMBINED_CACHE_DIR;
+
+ function* walkCaches(dir) {
+ const files = fs.readdirSync(dir, { withFileTypes: true });
+ for (const file of files) {
+ if (file.isDirectory()) {
+ const filePath = path.join(dir, file.name);
+ yield* walkCaches(filePath);
+ if (file.name === ".cache") {
+ yield filePath;
+ }
+ }
+ }
+ }
+
+ async function copyDir(src, dest) {
+ for await (const file of await fs.promises.readdir(src, { withFileTypes: true })) {
+ const srcPath = path.join(src, file.name);
+ const destPath = path.join(dest, file.name);
+ if (file.isDirectory()) {
+ if (!fs.existsSync(destPath)) {
+ fs.mkdirSync(destPath);
+ }
+ await copyDir(srcPath, destPath);
+ } else {
+ await fs.promises.copyFile(srcPath, destPath);
+ }
+ }
+ }
+
+ async function main() {
+ const cacheDirs = [...walkCaches(".")];
+
+ for (const dir of cacheDirs) {
+ console.log(`Found .cache dir at ${dir}`);
+ }
+
+ const globalCacheDir = path.join(os.homedir(), ".codeql", "compile-cache");
+ if (fs.existsSync(globalCacheDir)) {
+ console.log("Found global home dir: " + globalCacheDir);
+ cacheDirs.push(globalCacheDir);
+ }
+
+ if (cacheDirs.length === 0) {
+ console.log("No cache dirs found");
+ return;
+ }
+
+ // mkdir -p ${COMBINED_CACHE_DIR}
+ fs.mkdirSync(COMBINED_CACHE_DIR, { recursive: true });
+
+ // rm -f **/.cache/{lock,size} # -f to avoid errors if the cache is empty.
+ await Promise.all(
+ cacheDirs.map((cacheDir) =>
+ (async function () {
+ await fs.promises.rm(path.join(cacheDir, "lock"), { force: true });
+ await fs.promises.rm(path.join(cacheDir, "size"), { force: true });
+ })()
+ )
+ );
+
+ // # copy the contents of the .cache folders into the combined cache folder.
+ // cp -r **/.cache/* ${COMBINED_CACHE_DIR}/ || : # ignore missing files
+ await Promise.all(
+ cacheDirs.map((cacheDir) => copyDir(cacheDir, COMBINED_CACHE_DIR))
+ );
+
+ // # clean up the .cache folders
+ // rm -rf **/.cache/*
+ await Promise.all(
+ cacheDirs.map((cacheDir) => fs.promises.rm(cacheDir, { recursive: true }))
+ );
+ }
+ main();
diff --git a/.github/actions/cache-query-compilation/move-caches.js b/.github/actions/cache-query-compilation/move-caches.js
deleted file mode 100644
index 67fc503cdc0..00000000000
--- a/.github/actions/cache-query-compilation/move-caches.js
+++ /dev/null
@@ -1,75 +0,0 @@
-// # Move all the existing cache into another folder, so we only preserve the cache for the current queries.
-// mkdir -p ${COMBINED_CACHE_DIR}
-// rm -f **/.cache/{lock,size} # -f to avoid errors if the cache is empty.
-// # copy the contents of the .cache folders into the combined cache folder.
-// cp -r **/.cache/* ${COMBINED_CACHE_DIR}/ || : # ignore missing files
-// # clean up the .cache folders
-// rm -rf **/.cache/*
-
-const fs = require("fs");
-const path = require("path");
-
-// the first argv is the cache folder to create.
-const COMBINED_CACHE_DIR = process.argv[2];
-
-function* walkCaches(dir) {
- const files = fs.readdirSync(dir, { withFileTypes: true });
- for (const file of files) {
- if (file.isDirectory()) {
- const filePath = path.join(dir, file.name);
- yield* walkCaches(filePath);
- if (file.name === ".cache") {
- yield filePath;
- }
- }
- }
-}
-
-async function copyDir(src, dest) {
- for await (const file of await fs.promises.readdir(src, { withFileTypes: true })) {
- const srcPath = path.join(src, file.name);
- const destPath = path.join(dest, file.name);
- if (file.isDirectory()) {
- if (!fs.existsSync(destPath)) {
- fs.mkdirSync(destPath);
- }
- await copyDir(srcPath, destPath);
- } else {
- await fs.promises.copyFile(srcPath, destPath);
- }
- }
-}
-
-async function main() {
- const cacheDirs = [...walkCaches(".")];
-
- for (const dir of cacheDirs) {
- console.log(`Found .cache dir at ${dir}`);
- }
-
- // mkdir -p ${COMBINED_CACHE_DIR}
- fs.mkdirSync(COMBINED_CACHE_DIR, { recursive: true });
-
- // rm -f **/.cache/{lock,size} # -f to avoid errors if the cache is empty.
- await Promise.all(
- cacheDirs.map((cacheDir) =>
- (async function () {
- await fs.promises.rm(path.join(cacheDir, "lock"), { force: true });
- await fs.promises.rm(path.join(cacheDir, "size"), { force: true });
- })()
- )
- );
-
- // # copy the contents of the .cache folders into the combined cache folder.
- // cp -r **/.cache/* ${COMBINED_CACHE_DIR}/ || : # ignore missing files
- await Promise.all(
- cacheDirs.map((cacheDir) => copyDir(cacheDir, COMBINED_CACHE_DIR))
- );
-
- // # clean up the .cache folders
- // rm -rf **/.cache/*
- await Promise.all(
- cacheDirs.map((cacheDir) => fs.promises.rm(cacheDir, { recursive: true }))
- );
-}
-main();
diff --git a/.github/workflows/compile-queries.yml b/.github/workflows/compile-queries.yml
index 96d8e4cc30b..ac63af81ccd 100644
--- a/.github/workflows/compile-queries.yml
+++ b/.github/workflows/compile-queries.yml
@@ -24,14 +24,14 @@ jobs:
with:
key: all-queries
- name: check formatting
- run: find */ql -type f \( -name "*.qll" -o -name "*.ql" \) -print0 | xargs -0 codeql query format --check-only
+ run: find */ql -type f \( -name "*.qll" -o -name "*.ql" \) -print0 | xargs -0 -n 3000 -P 10 codeql query format -q --check-only
- name: compile queries - check-only
# run with --check-only if running in a PR (github.sha != main)
if : ${{ github.event_name == 'pull_request' }}
shell: bash
- run: codeql query compile -j0 */ql/{src,examples} --keep-going --warnings=error --check-only --compilation-cache "${{ steps.query-cache.outputs.cache-dir }}"
+ run: codeql query compile -q -j0 */ql/{src,examples} --keep-going --warnings=error --check-only --compilation-cache "${{ steps.query-cache.outputs.cache-dir }}"
- name: compile queries - full
# do full compile if running on main - this populates the cache
if : ${{ github.event_name != 'pull_request' }}
shell: bash
- run: codeql query compile -j0 */ql/{src,examples} --keep-going --warnings=error --compilation-cache "${{ steps.query-cache.outputs.cache-dir }}"
+ run: codeql query compile -q -j0 */ql/{src,examples} --keep-going --warnings=error --compilation-cache "${{ steps.query-cache.outputs.cache-dir }}"
diff --git a/.github/workflows/go-tests-other-os.yml b/.github/workflows/go-tests-other-os.yml
index edf6eb63d49..97207f573c0 100644
--- a/.github/workflows/go-tests-other-os.yml
+++ b/.github/workflows/go-tests-other-os.yml
@@ -12,10 +12,10 @@ jobs:
name: Test MacOS
runs-on: macos-latest
steps:
- - name: Set up Go 1.19
+ - name: Set up Go 1.20
uses: actions/setup-go@v3
with:
- go-version: 1.19
+ go-version: 1.20.0
id: go
- name: Check out code
@@ -47,10 +47,10 @@ jobs:
name: Test Windows
runs-on: windows-latest-xl
steps:
- - name: Set up Go 1.19
+ - name: Set up Go 1.20
uses: actions/setup-go@v3
with:
- go-version: 1.19
+ go-version: 1.20.0
id: go
- name: Check out code
diff --git a/.github/workflows/go-tests.yml b/.github/workflows/go-tests.yml
index eceabb0410a..4cf299e5e44 100644
--- a/.github/workflows/go-tests.yml
+++ b/.github/workflows/go-tests.yml
@@ -20,10 +20,10 @@ jobs:
name: Test Linux (Ubuntu)
runs-on: ubuntu-latest-xl
steps:
- - name: Set up Go 1.19
+ - name: Set up Go 1.20
uses: actions/setup-go@v3
with:
- go-version: 1.19
+ go-version: 1.20.0
id: go
- name: Check out code
diff --git a/.github/workflows/ql-for-ql-build.yml b/.github/workflows/ql-for-ql-build.yml
index 8b6e7a56946..49d73d60fc5 100644
--- a/.github/workflows/ql-for-ql-build.yml
+++ b/.github/workflows/ql-for-ql-build.yml
@@ -5,13 +5,6 @@ on:
branches: [main]
pull_request:
branches: [main]
- paths:
- - "ql/**"
- - "**.qll"
- - "**.ql"
- - "**.dbscheme"
- - "**/qlpack.yml"
- - ".github/workflows/ql-for-ql-build.yml"
env:
CARGO_TERM_COLOR: always
@@ -22,6 +15,8 @@ jobs:
steps:
### Build the queries ###
- uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
- name: Find codeql
id: find-codeql
uses: github/codeql-action/init@v2
@@ -34,7 +29,9 @@ jobs:
id: cache-extractor
uses: actions/cache@v3
with:
- path: ql/extractor-pack/
+ path: |
+ ql/extractor-pack/
+ ql/target/release/buramu
key: ${{ runner.os }}-${{ steps.os_version.outputs.version }}-extractor-${{ hashFiles('ql/**/Cargo.lock') }}-${{ hashFiles('ql/**/*.rs') }}
- name: Cache cargo
if: steps.cache-extractor.outputs.cache-hit != 'true'
@@ -57,6 +54,7 @@ jobs:
key: run-ql-for-ql
- name: Make database and analyze
run: |
+ ./ql/target/release/buramu | tee deprecated.blame # Add a blame file for the extractor to parse.
${CODEQL} database create -l=ql --search-path ql/extractor-pack ${DB}
${CODEQL} database analyze -j0 --format=sarif-latest --output=ql-for-ql.sarif ${DB} ql/ql/src/codeql-suites/ql-code-scanning.qls --compilation-cache "${{ steps.query-cache.outputs.cache-dir }}"
env:
@@ -65,6 +63,7 @@ jobs:
LGTM_INDEX_FILTERS: |
exclude:ql/ql/test
exclude:*/ql/lib/upgrades/
+ exclude:java/ql/integration-tests
- name: Upload sarif to code-scanning
uses: github/codeql-action/upload-sarif@v2
with:
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index de0f11f0521..806e04e6c68 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -5,6 +5,7 @@ on:
paths:
- "swift/**"
- "misc/bazel/**"
+ - "misc/codegen/**"
- "*.bazel*"
- .github/workflows/swift.yml
- .github/actions/**
@@ -19,6 +20,7 @@ on:
paths:
- "swift/**"
- "misc/bazel/**"
+ - "misc/codegen/**"
- "*.bazel*"
- .github/workflows/swift.yml
- .github/actions/**
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5f35c2c183b..e612a423462 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,5 +53,5 @@ repos:
name: Run Swift code generation unit tests
files: ^swift/codegen/.*\.py$
language: system
- entry: bazel test //swift/codegen/test
+ entry: bazel test //misc/codegen/test
pass_filenames: false
diff --git a/CODEOWNERS b/CODEOWNERS
index 42fb364418f..8371dd768cd 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -2,10 +2,11 @@
/csharp/ @github/codeql-csharp
/go/ @github/codeql-go
/java/ @github/codeql-java
-/javascript/ @github/codeql-javascript
-/python/ @github/codeql-python
-/ruby/ @github/codeql-ruby
+/javascript/ @github/codeql-dynamic
+/python/ @github/codeql-dynamic
+/ruby/ @github/codeql-dynamic
/swift/ @github/codeql-swift
+/misc/codegen/ @github/codeql-swift
/java/kotlin-extractor/ @github/codeql-kotlin
/java/kotlin-explorer/ @github/codeql-kotlin
diff --git a/cpp/autobuilder/Semmle.Autobuild.Cpp.Tests/BuildScripts.cs b/cpp/autobuilder/Semmle.Autobuild.Cpp.Tests/BuildScripts.cs
index def45890c9f..846d8333030 100644
--- a/cpp/autobuilder/Semmle.Autobuild.Cpp.Tests/BuildScripts.cs
+++ b/cpp/autobuilder/Semmle.Autobuild.Cpp.Tests/BuildScripts.cs
@@ -131,6 +131,14 @@ namespace Semmle.Autobuild.Cpp.Tests
bool IBuildActions.IsWindows() => IsWindows;
+ public bool IsMacOs { get; set; }
+
+ bool IBuildActions.IsMacOs() => IsMacOs;
+
+ public bool IsArm { get; set; }
+
+ bool IBuildActions.IsArm() => IsArm;
+
string IBuildActions.PathCombine(params string[] parts)
{
return string.Join(IsWindows ? '\\' : '/', parts.Where(p => !string.IsNullOrWhiteSpace(p)));
diff --git a/cpp/ql/lib/CHANGELOG.md b/cpp/ql/lib/CHANGELOG.md
index 387f55a3e2e..319e78ac20b 100644
--- a/cpp/ql/lib/CHANGELOG.md
+++ b/cpp/ql/lib/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.5.3
+
+No user-facing changes.
+
## 0.5.2
No user-facing changes.
diff --git a/cpp/ql/lib/change-notes/released/0.5.3.md b/cpp/ql/lib/change-notes/released/0.5.3.md
new file mode 100644
index 00000000000..e97503053f0
--- /dev/null
+++ b/cpp/ql/lib/change-notes/released/0.5.3.md
@@ -0,0 +1,3 @@
+## 0.5.3
+
+No user-facing changes.
diff --git a/cpp/ql/lib/codeql-pack.release.yml b/cpp/ql/lib/codeql-pack.release.yml
index 2d9d3f587f8..2164e038a5d 100644
--- a/cpp/ql/lib/codeql-pack.release.yml
+++ b/cpp/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.5.2
+lastReleaseVersion: 0.5.3
diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml
index 239e5ad0055..ee8b5187961 100644
--- a/cpp/ql/lib/qlpack.yml
+++ b/cpp/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/cpp-all
-version: 0.5.3-dev
+version: 0.5.4-dev
groups: cpp
dbscheme: semmlecode.cpp.dbscheme
extractor: cpp
diff --git a/cpp/ql/src/CHANGELOG.md b/cpp/ql/src/CHANGELOG.md
index 8b2bc6c1be0..f0364b77bab 100644
--- a/cpp/ql/src/CHANGELOG.md
+++ b/cpp/ql/src/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.5.3
+
+No user-facing changes.
+
## 0.5.2
No user-facing changes.
diff --git a/cpp/ql/src/change-notes/released/0.5.3.md b/cpp/ql/src/change-notes/released/0.5.3.md
new file mode 100644
index 00000000000..e97503053f0
--- /dev/null
+++ b/cpp/ql/src/change-notes/released/0.5.3.md
@@ -0,0 +1,3 @@
+## 0.5.3
+
+No user-facing changes.
diff --git a/cpp/ql/src/codeql-pack.release.yml b/cpp/ql/src/codeql-pack.release.yml
index 2d9d3f587f8..2164e038a5d 100644
--- a/cpp/ql/src/codeql-pack.release.yml
+++ b/cpp/ql/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.5.2
+lastReleaseVersion: 0.5.3
diff --git a/cpp/ql/src/qlpack.yml b/cpp/ql/src/qlpack.yml
index fb977480f05..025587014e6 100644
--- a/cpp/ql/src/qlpack.yml
+++ b/cpp/ql/src/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/cpp-queries
-version: 0.5.3-dev
+version: 0.5.4-dev
groups:
- cpp
- queries
diff --git a/csharp/autobuilder/Semmle.Autobuild.CSharp.Tests/BuildScripts.cs b/csharp/autobuilder/Semmle.Autobuild.CSharp.Tests/BuildScripts.cs
index df362c2a129..ec7c63a022d 100644
--- a/csharp/autobuilder/Semmle.Autobuild.CSharp.Tests/BuildScripts.cs
+++ b/csharp/autobuilder/Semmle.Autobuild.CSharp.Tests/BuildScripts.cs
@@ -145,6 +145,14 @@ namespace Semmle.Autobuild.CSharp.Tests
bool IBuildActions.IsWindows() => IsWindows;
+ public bool IsMacOs { get; set; }
+
+ bool IBuildActions.IsMacOs() => IsMacOs;
+
+ public bool IsArm { get; set; }
+
+ bool IBuildActions.IsArm() => IsArm;
+
public string PathCombine(params string[] parts)
{
return string.Join(IsWindows ? '\\' : '/', parts.Where(p => !string.IsNullOrWhiteSpace(p)));
diff --git a/csharp/autobuilder/Semmle.Autobuild.Shared/BuildActions.cs b/csharp/autobuilder/Semmle.Autobuild.Shared/BuildActions.cs
index 1cf1d6253a5..f3f9ae15da6 100644
--- a/csharp/autobuilder/Semmle.Autobuild.Shared/BuildActions.cs
+++ b/csharp/autobuilder/Semmle.Autobuild.Shared/BuildActions.cs
@@ -7,6 +7,7 @@ using System.Xml;
using System.Net.Http;
using System.Diagnostics.CodeAnalysis;
using System.Threading.Tasks;
+using System.Runtime.InteropServices;
namespace Semmle.Autobuild.Shared
{
@@ -98,6 +99,18 @@ namespace Semmle.Autobuild.Shared
///
bool IsWindows();
+ ///
+ /// Gets a value indicating whether we are running on macOS.
+ ///
+ /// True if we are running on macOS.
+ bool IsMacOs();
+
+ ///
+ /// Gets a value indicating whether we are running on arm.
+ ///
+ /// True if we are running on arm.
+ bool IsArm();
+
///
/// Combine path segments, Path.Combine().
///
@@ -203,6 +216,12 @@ namespace Semmle.Autobuild.Shared
bool IBuildActions.IsWindows() => Win32.IsWindows();
+ bool IBuildActions.IsMacOs() => RuntimeInformation.IsOSPlatform(OSPlatform.OSX);
+
+ bool IBuildActions.IsArm() =>
+ RuntimeInformation.ProcessArchitecture == Architecture.Arm64 ||
+ RuntimeInformation.ProcessArchitecture == Architecture.Arm;
+
string IBuildActions.PathCombine(params string[] parts) => Path.Combine(parts);
void IBuildActions.WriteAllText(string filename, string contents) => File.WriteAllText(filename, contents);
diff --git a/csharp/autobuilder/Semmle.Autobuild.Shared/MsBuildRule.cs b/csharp/autobuilder/Semmle.Autobuild.Shared/MsBuildRule.cs
index 77f2f70f718..56858cc87a2 100644
--- a/csharp/autobuilder/Semmle.Autobuild.Shared/MsBuildRule.cs
+++ b/csharp/autobuilder/Semmle.Autobuild.Shared/MsBuildRule.cs
@@ -1,18 +1,36 @@
using Semmle.Util.Logging;
+using System;
using System.Linq;
+using System.Runtime.InteropServices;
namespace Semmle.Autobuild.Shared
{
+ internal static class MsBuildCommandExtensions
+ {
+ ///
+ /// Appends a call to msbuild.
+ ///
+ ///
+ ///
+ ///
+ public static CommandBuilder MsBuildCommand(this CommandBuilder cmdBuilder, IAutobuilder builder)
+ {
+ var isArmMac = builder.Actions.IsMacOs() && builder.Actions.IsArm();
+
+ // mono doesn't ship with `msbuild` on Arm-based Macs, but we can fall back to
+ // msbuild that ships with `dotnet` which can be invoked with `dotnet msbuild`
+ // perhaps we should do this on all platforms?
+ return isArmMac ?
+ cmdBuilder.RunCommand("dotnet").Argument("msbuild") :
+ cmdBuilder.RunCommand("msbuild");
+ }
+ }
+
///
/// A build rule using msbuild.
///
public class MsBuildRule : IBuildRule
{
- ///
- /// The name of the msbuild command.
- ///
- private const string msBuild = "msbuild";
-
public BuildScript Analyse(IAutobuilder builder, bool auto)
{
if (!builder.ProjectsOrSolutionsToBuild.Any())
@@ -57,7 +75,7 @@ namespace Semmle.Autobuild.Shared
Script;
var nugetRestore = GetNugetRestoreScript();
var msbuildRestoreCommand = new CommandBuilder(builder.Actions).
- RunCommand(msBuild).
+ MsBuildCommand(builder).
Argument("/t:restore").
QuoteArgument(projectOrSolution.FullPath);
@@ -95,7 +113,7 @@ namespace Semmle.Autobuild.Shared
command.RunCommand("set Platform=&& type NUL", quoteExe: false);
}
- command.RunCommand(msBuild);
+ command.MsBuildCommand(builder);
command.QuoteArgument(projectOrSolution.FullPath);
var target = builder.Options.MsBuildTarget ?? "rebuild";
diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifier.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifier.cs
index 7e4a3a95e39..15da81bf5e4 100644
--- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifier.cs
+++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifier.cs
@@ -65,6 +65,15 @@ namespace Semmle.Extraction.CSharp.Entities
trapFile.has_modifiers(target, Modifier.Create(cx, modifier));
}
+ private static void ExtractFieldModifiers(Context cx, TextWriter trapFile, IEntity key, IFieldSymbol symbol)
+ {
+ if (symbol.IsReadOnly)
+ HasModifier(cx, trapFile, key, Modifiers.Readonly);
+
+ if (symbol.IsRequired)
+ HasModifier(cx, trapFile, key, Modifiers.Required);
+ }
+
private static void ExtractNamedTypeModifiers(Context cx, TextWriter trapFile, IEntity key, ISymbol symbol)
{
if (symbol.Kind != SymbolKind.NamedType)
@@ -106,8 +115,11 @@ namespace Semmle.Extraction.CSharp.Entities
if (symbol.IsVirtual)
HasModifier(cx, trapFile, key, Modifiers.Virtual);
- if (symbol.Kind == SymbolKind.Field && ((IFieldSymbol)symbol).IsReadOnly)
- HasModifier(cx, trapFile, key, Modifiers.Readonly);
+ if (symbol is IFieldSymbol field)
+ ExtractFieldModifiers(cx, trapFile, key, field);
+
+ if (symbol.Kind == SymbolKind.Property && ((IPropertySymbol)symbol).IsRequired)
+ HasModifier(cx, trapFile, key, Modifiers.Required);
if (symbol.IsOverride)
HasModifier(cx, trapFile, key, Modifiers.Override);
diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifiers.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifiers.cs
index f535a9c3058..ef38646bc81 100644
--- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifiers.cs
+++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Modifiers.cs
@@ -13,6 +13,7 @@ internal static class Modifiers
public const string Public = "public";
public const string Readonly = "readonly";
public const string Record = "record";
+ public const string Required = "required";
public const string Ref = "ref";
public const string Sealed = "sealed";
public const string Static = "static";
diff --git a/csharp/extractor/Semmle.Extraction.CSharp/SymbolExtensions.cs b/csharp/extractor/Semmle.Extraction.CSharp/SymbolExtensions.cs
index 6018b9903c1..debb96ca3b6 100644
--- a/csharp/extractor/Semmle.Extraction.CSharp/SymbolExtensions.cs
+++ b/csharp/extractor/Semmle.Extraction.CSharp/SymbolExtensions.cs
@@ -77,12 +77,8 @@ namespace Semmle.Extraction.CSharp
///
/// Gets the source-level modifiers belonging to this symbol, if any.
///
- public static IEnumerable GetSourceLevelModifiers(this ISymbol symbol)
- {
- var methodModifiers = symbol.GetModifiers(md => md.Modifiers);
- var typeModifiers = symbol.GetModifiers(cd => cd.Modifiers);
- return methodModifiers.Concat(typeModifiers).Select(m => m.Text);
- }
+ public static IEnumerable GetSourceLevelModifiers(this ISymbol symbol) =>
+ symbol.GetModifiers(md => md.Modifiers).Select(m => m.Text);
///
/// Holds if the ID generated for `dependant` will contain a reference to
diff --git a/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md b/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md
index 3137a84a435..3d63162ca4d 100644
--- a/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md
+++ b/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.4.3
+
+No user-facing changes.
+
## 1.4.2
No user-facing changes.
diff --git a/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.4.3.md b/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.4.3.md
new file mode 100644
index 00000000000..abf2a0d4dcc
--- /dev/null
+++ b/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.4.3.md
@@ -0,0 +1,3 @@
+## 1.4.3
+
+No user-facing changes.
diff --git a/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml b/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml
index a76cacdf799..08f88b689fb 100644
--- a/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml
+++ b/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 1.4.2
+lastReleaseVersion: 1.4.3
diff --git a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml
index 4655f3d5939..7aa032fa92d 100644
--- a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml
+++ b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/csharp-solorigate-all
-version: 1.4.3-dev
+version: 1.4.4-dev
groups:
- csharp
- solorigate
diff --git a/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md b/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md
index 3137a84a435..3d63162ca4d 100644
--- a/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md
+++ b/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.4.3
+
+No user-facing changes.
+
## 1.4.2
No user-facing changes.
diff --git a/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.4.3.md b/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.4.3.md
new file mode 100644
index 00000000000..abf2a0d4dcc
--- /dev/null
+++ b/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.4.3.md
@@ -0,0 +1,3 @@
+## 1.4.3
+
+No user-facing changes.
diff --git a/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml b/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml
index a76cacdf799..08f88b689fb 100644
--- a/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml
+++ b/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 1.4.2
+lastReleaseVersion: 1.4.3
diff --git a/csharp/ql/campaigns/Solorigate/src/qlpack.yml b/csharp/ql/campaigns/Solorigate/src/qlpack.yml
index 813cb0d50ce..72f0256ef39 100644
--- a/csharp/ql/campaigns/Solorigate/src/qlpack.yml
+++ b/csharp/ql/campaigns/Solorigate/src/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/csharp-solorigate-queries
-version: 1.4.3-dev
+version: 1.4.4-dev
groups:
- csharp
- solorigate
diff --git a/csharp/ql/integration-tests/all-platforms/msbuild/Program.cs b/csharp/ql/integration-tests/all-platforms/msbuild/Program.cs
new file mode 100644
index 00000000000..66b32336aa3
--- /dev/null
+++ b/csharp/ql/integration-tests/all-platforms/msbuild/Program.cs
@@ -0,0 +1,13 @@
+using System;
+
+namespace Test
+{
+ public class Program
+ {
+ public static int Main(string[] args)
+ {
+ Console.WriteLine("Hello world!");
+ return 0;
+ }
+ }
+}
diff --git a/csharp/ql/integration-tests/all-platforms/msbuild/test.csproj b/csharp/ql/integration-tests/all-platforms/msbuild/test.csproj
new file mode 100644
index 00000000000..88ed91975ce
--- /dev/null
+++ b/csharp/ql/integration-tests/all-platforms/msbuild/test.csproj
@@ -0,0 +1,7 @@
+
+
+
+ Exe
+ net4.0
+
+
diff --git a/csharp/ql/integration-tests/all-platforms/msbuild/test.py b/csharp/ql/integration-tests/all-platforms/msbuild/test.py
new file mode 100644
index 00000000000..97682d28205
--- /dev/null
+++ b/csharp/ql/integration-tests/all-platforms/msbuild/test.py
@@ -0,0 +1,4 @@
+from create_database_utils import *
+
+# force CodeQL to use MSBuild by setting `LGTM_INDEX_MSBUILD_TARGET`
+run_codeql_database_create([], test_db="default-db", db=None, lang="csharp", extra_env={ 'LGTM_INDEX_MSBUILD_TARGET': 'Build' })
diff --git a/csharp/ql/lib/CHANGELOG.md b/csharp/ql/lib/CHANGELOG.md
index d31bad9b040..7d14d20ddbc 100644
--- a/csharp/ql/lib/CHANGELOG.md
+++ b/csharp/ql/lib/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.5.3
+
+### Minor Analysis Improvements
+
+* C# 11: Added extractor support for the `scoped` modifier annotation on parameters and local variables.
+
## 0.5.2
### Major Analysis Improvements
diff --git a/csharp/ql/lib/change-notes/2023-02-07-scoped-modifier.md b/csharp/ql/lib/change-notes/2023-02-07-scoped-modifier.md
deleted file mode 100644
index 93460b64184..00000000000
--- a/csharp/ql/lib/change-notes/2023-02-07-scoped-modifier.md
+++ /dev/null
@@ -1,4 +0,0 @@
----
-category: minorAnalysis
----
-* C# 11: Added extractor support for the `scoped` modifier annotation on parameters and local variables.
\ No newline at end of file
diff --git a/csharp/ql/lib/change-notes/2023-02-14-checked-operators.md b/csharp/ql/lib/change-notes/2023-02-14-checked-operators.md
new file mode 100644
index 00000000000..19d4a7f3f66
--- /dev/null
+++ b/csharp/ql/lib/change-notes/2023-02-14-checked-operators.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* C# 11: Added library support for `checked` operators.
\ No newline at end of file
diff --git a/csharp/ql/lib/change-notes/2023-02-16-requiredmembers.md b/csharp/ql/lib/change-notes/2023-02-16-requiredmembers.md
new file mode 100644
index 00000000000..8a318ca52ec
--- /dev/null
+++ b/csharp/ql/lib/change-notes/2023-02-16-requiredmembers.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* C# 11: Added extractor support for `required` fields and properties.
\ No newline at end of file
diff --git a/csharp/ql/lib/change-notes/2023-02-22-modifierextraction.md b/csharp/ql/lib/change-notes/2023-02-22-modifierextraction.md
new file mode 100644
index 00000000000..2078c2a9f1e
--- /dev/null
+++ b/csharp/ql/lib/change-notes/2023-02-22-modifierextraction.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* The extraction of member modifiers has been generalised, which could lead to the extraction of more modifiers.
\ No newline at end of file
diff --git a/csharp/ql/lib/change-notes/released/0.5.3.md b/csharp/ql/lib/change-notes/released/0.5.3.md
new file mode 100644
index 00000000000..a4f605335ac
--- /dev/null
+++ b/csharp/ql/lib/change-notes/released/0.5.3.md
@@ -0,0 +1,5 @@
+## 0.5.3
+
+### Minor Analysis Improvements
+
+* C# 11: Added extractor support for the `scoped` modifier annotation on parameters and local variables.
diff --git a/csharp/ql/lib/codeql-pack.release.yml b/csharp/ql/lib/codeql-pack.release.yml
index 2d9d3f587f8..2164e038a5d 100644
--- a/csharp/ql/lib/codeql-pack.release.yml
+++ b/csharp/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.5.2
+lastReleaseVersion: 0.5.3
diff --git a/csharp/ql/lib/qlpack.yml b/csharp/ql/lib/qlpack.yml
index 106d566fefe..3f118d8115f 100644
--- a/csharp/ql/lib/qlpack.yml
+++ b/csharp/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/csharp-all
-version: 0.5.3-dev
+version: 0.5.4-dev
groups: csharp
dbscheme: semmlecode.csharp.dbscheme
extractor: csharp
@@ -8,6 +8,7 @@ upgrades: upgrades
dependencies:
codeql/ssa: ${workspace}
codeql/tutorial: ${workspace}
+ codeql/util: ${workspace}
dataExtensions:
- ext/*.model.yml
- ext/generated/*.model.yml
diff --git a/csharp/ql/lib/semmle/code/csharp/Callable.qll b/csharp/ql/lib/semmle/code/csharp/Callable.qll
index ea88f814bce..82ffbfa6a06 100644
--- a/csharp/ql/lib/semmle/code/csharp/Callable.qll
+++ b/csharp/ql/lib/semmle/code/csharp/Callable.qll
@@ -435,8 +435,12 @@ class Destructor extends DotNet::Destructor, Callable, Member, Attributable, @de
* (`BinaryOperator`), or a conversion operator (`ConversionOperator`).
*/
class Operator extends Callable, Member, Attributable, @operator {
- /** Gets the assembly name of this operator. */
- string getAssemblyName() { operators(this, result, _, _, _, _) }
+ /**
+ * DEPRECATED: use `getFunctionName()` instead.
+ *
+ * Gets the assembly name of this operator.
+ */
+ deprecated string getAssemblyName() { result = this.getFunctionName() }
override string getName() { operators(this, _, result, _, _, _) }
@@ -445,7 +449,7 @@ class Operator extends Callable, Member, Attributable, @operator {
/**
* Gets the metadata name of the operator, such as `op_implicit` or `op_RightShift`.
*/
- string getFunctionName() { none() }
+ string getFunctionName() { operators(this, result, _, _, _, _) }
override ValueOrRefType getDeclaringType() { operators(this, _, _, result, _, _) }
@@ -481,10 +485,11 @@ class RecordCloneMethod extends Method, DotNet::RecordCloneCallable {
* A user-defined unary operator - an operator taking one operand.
*
* Either a plus operator (`PlusOperator`), minus operator (`MinusOperator`),
- * not operator (`NotOperator`), complement operator (`ComplementOperator`),
- * true operator (`TrueOperator`), false operator (`FalseOperator`),
- * increment operator (`IncrementOperator`), or decrement operator
- * (`DecrementOperator`).
+ * checked minus operator (`CheckedMinusOperator`), not operator (`NotOperator`),
+ * complement operator (`ComplementOperator`), true operator (`TrueOperator`),
+ * false operator (`FalseOperator`), increment operator (`IncrementOperator`),
+ * checked increment operator (`CheckedIncrementOperator`), decrement operator
+ * (`DecrementOperator`) or checked decrement operator (`CheckedDecrementOperator`).
*/
class UnaryOperator extends Operator {
UnaryOperator() {
@@ -505,8 +510,6 @@ class UnaryOperator extends Operator {
class PlusOperator extends UnaryOperator {
PlusOperator() { this.getName() = "+" }
- override string getFunctionName() { result = "op_UnaryPlus" }
-
override string getAPrimaryQlClass() { result = "PlusOperator" }
}
@@ -522,11 +525,24 @@ class PlusOperator extends UnaryOperator {
class MinusOperator extends UnaryOperator {
MinusOperator() { this.getName() = "-" }
- override string getFunctionName() { result = "op_UnaryNegation" }
-
override string getAPrimaryQlClass() { result = "MinusOperator" }
}
+/**
+ * A user-defined checked minus operator (`-`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked -(Widget w) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedMinusOperator extends UnaryOperator {
+ CheckedMinusOperator() { this.getName() = "checked -" }
+
+ override string getAPrimaryQlClass() { result = "CheckedMinusOperator" }
+}
+
/**
* A user-defined not operator (`!`), for example
*
@@ -539,8 +555,6 @@ class MinusOperator extends UnaryOperator {
class NotOperator extends UnaryOperator {
NotOperator() { this.getName() = "!" }
- override string getFunctionName() { result = "op_LogicalNot" }
-
override string getAPrimaryQlClass() { result = "NotOperator" }
}
@@ -556,8 +570,6 @@ class NotOperator extends UnaryOperator {
class ComplementOperator extends UnaryOperator {
ComplementOperator() { this.getName() = "~" }
- override string getFunctionName() { result = "op_OnesComplement" }
-
override string getAPrimaryQlClass() { result = "ComplementOperator" }
}
@@ -573,11 +585,24 @@ class ComplementOperator extends UnaryOperator {
class IncrementOperator extends UnaryOperator {
IncrementOperator() { this.getName() = "++" }
- override string getFunctionName() { result = "op_Increment" }
-
override string getAPrimaryQlClass() { result = "IncrementOperator" }
}
+/**
+ * A user-defined checked increment operator (`++`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked ++(Widget w) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedIncrementOperator extends UnaryOperator {
+ CheckedIncrementOperator() { this.getName() = "checked ++" }
+
+ override string getAPrimaryQlClass() { result = "CheckedIncrementOperator" }
+}
+
/**
* A user-defined decrement operator (`--`), for example
*
@@ -590,11 +615,24 @@ class IncrementOperator extends UnaryOperator {
class DecrementOperator extends UnaryOperator {
DecrementOperator() { this.getName() = "--" }
- override string getFunctionName() { result = "op_Decrement" }
-
override string getAPrimaryQlClass() { result = "DecrementOperator" }
}
+/**
+ * A user-defined checked decrement operator (`--`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked --(Widget w) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedDecrementOperator extends UnaryOperator {
+ CheckedDecrementOperator() { this.getName() = "checked --" }
+
+ override string getAPrimaryQlClass() { result = "CheckedDecrementOperator" }
+}
+
/**
* A user-defined false operator (`false`), for example
*
@@ -607,8 +645,6 @@ class DecrementOperator extends UnaryOperator {
class FalseOperator extends UnaryOperator {
FalseOperator() { this.getName() = "false" }
- override string getFunctionName() { result = "op_False" }
-
override string getAPrimaryQlClass() { result = "FalseOperator" }
}
@@ -624,17 +660,18 @@ class FalseOperator extends UnaryOperator {
class TrueOperator extends UnaryOperator {
TrueOperator() { this.getName() = "true" }
- override string getFunctionName() { result = "op_True" }
-
override string getAPrimaryQlClass() { result = "TrueOperator" }
}
/**
* A user-defined binary operator.
*
- * Either an addition operator (`AddOperator`), a subtraction operator
- * (`SubOperator`), a multiplication operator (`MulOperator`), a division
- * operator (`DivOperator`), a remainder operator (`RemOperator`), an and
+ * Either an addition operator (`AddOperator`), a checked addition operator
+ * (`CheckedAddOperator`) a subtraction operator (`SubOperator`), a checked
+ * substraction operator (`CheckedSubOperator`), a multiplication operator
+ * (`MulOperator`), a checked multiplication operator (`CheckedMulOperator`),
+ * a division operator (`DivOperator`), a checked division operator
+ * (`CheckedDivOperator`), a remainder operator (`RemOperator`), an and
* operator (`AndOperator`), an or operator (`OrOperator`), an xor
* operator (`XorOperator`), a left shift operator (`LeftShiftOperator`),
* a right shift operator (`RightShiftOperator`), an unsigned right shift
@@ -659,11 +696,24 @@ class BinaryOperator extends Operator {
class AddOperator extends BinaryOperator {
AddOperator() { this.getName() = "+" }
- override string getFunctionName() { result = "op_Addition" }
-
override string getAPrimaryQlClass() { result = "AddOperator" }
}
+/**
+ * A user-defined checked addition operator (`+`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked +(Widget lhs, Widget rhs) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedAddOperator extends BinaryOperator {
+ CheckedAddOperator() { this.getName() = "checked +" }
+
+ override string getAPrimaryQlClass() { result = "CheckedAddOperator" }
+}
+
/**
* A user-defined subtraction operator (`-`), for example
*
@@ -676,11 +726,24 @@ class AddOperator extends BinaryOperator {
class SubOperator extends BinaryOperator {
SubOperator() { this.getName() = "-" }
- override string getFunctionName() { result = "op_Subtraction" }
-
override string getAPrimaryQlClass() { result = "SubOperator" }
}
+/**
+ * A user-defined checked subtraction operator (`-`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked -(Widget lhs, Widget rhs) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedSubOperator extends BinaryOperator {
+ CheckedSubOperator() { this.getName() = "checked -" }
+
+ override string getAPrimaryQlClass() { result = "CheckedSubOperator" }
+}
+
/**
* A user-defined multiplication operator (`*`), for example
*
@@ -693,11 +756,24 @@ class SubOperator extends BinaryOperator {
class MulOperator extends BinaryOperator {
MulOperator() { this.getName() = "*" }
- override string getFunctionName() { result = "op_Multiply" }
-
override string getAPrimaryQlClass() { result = "MulOperator" }
}
+/**
+ * A user-defined checked multiplication operator (`*`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked *(Widget lhs, Widget rhs) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedMulOperator extends BinaryOperator {
+ CheckedMulOperator() { this.getName() = "checked *" }
+
+ override string getAPrimaryQlClass() { result = "CheckedMulOperator" }
+}
+
/**
* A user-defined division operator (`/`), for example
*
@@ -710,11 +786,24 @@ class MulOperator extends BinaryOperator {
class DivOperator extends BinaryOperator {
DivOperator() { this.getName() = "/" }
- override string getFunctionName() { result = "op_Division" }
-
override string getAPrimaryQlClass() { result = "DivOperator" }
}
+/**
+ * A user-defined checked division operator (`/`), for example
+ *
+ * ```csharp
+ * public static Widget operator checked /(Widget lhs, Widget rhs) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedDivOperator extends BinaryOperator {
+ CheckedDivOperator() { this.getName() = "checked /" }
+
+ override string getAPrimaryQlClass() { result = "CheckedDivOperator" }
+}
+
/**
* A user-defined remainder operator (`%`), for example
*
@@ -727,8 +816,6 @@ class DivOperator extends BinaryOperator {
class RemOperator extends BinaryOperator {
RemOperator() { this.getName() = "%" }
- override string getFunctionName() { result = "op_Modulus" }
-
override string getAPrimaryQlClass() { result = "RemOperator" }
}
@@ -744,8 +831,6 @@ class RemOperator extends BinaryOperator {
class AndOperator extends BinaryOperator {
AndOperator() { this.getName() = "&" }
- override string getFunctionName() { result = "op_BitwiseAnd" }
-
override string getAPrimaryQlClass() { result = "AndOperator" }
}
@@ -761,8 +846,6 @@ class AndOperator extends BinaryOperator {
class OrOperator extends BinaryOperator {
OrOperator() { this.getName() = "|" }
- override string getFunctionName() { result = "op_BitwiseOr" }
-
override string getAPrimaryQlClass() { result = "OrOperator" }
}
@@ -778,8 +861,6 @@ class OrOperator extends BinaryOperator {
class XorOperator extends BinaryOperator {
XorOperator() { this.getName() = "^" }
- override string getFunctionName() { result = "op_ExclusiveOr" }
-
override string getAPrimaryQlClass() { result = "XorOperator" }
}
@@ -795,8 +876,6 @@ class XorOperator extends BinaryOperator {
class LeftShiftOperator extends BinaryOperator {
LeftShiftOperator() { this.getName() = "<<" }
- override string getFunctionName() { result = "op_LeftShift" }
-
override string getAPrimaryQlClass() { result = "LeftShiftOperator" }
}
@@ -815,8 +894,6 @@ deprecated class LShiftOperator = LeftShiftOperator;
class RightShiftOperator extends BinaryOperator {
RightShiftOperator() { this.getName() = ">>" }
- override string getFunctionName() { result = "op_RightShift" }
-
override string getAPrimaryQlClass() { result = "RightShiftOperator" }
}
@@ -835,8 +912,6 @@ deprecated class RShiftOperator = RightShiftOperator;
class UnsignedRightShiftOperator extends BinaryOperator {
UnsignedRightShiftOperator() { this.getName() = ">>>" }
- override string getFunctionName() { result = "op_UnsignedRightShift" }
-
override string getAPrimaryQlClass() { result = "UnsignedRightShiftOperator" }
}
@@ -852,8 +927,6 @@ class UnsignedRightShiftOperator extends BinaryOperator {
class EQOperator extends BinaryOperator {
EQOperator() { this.getName() = "==" }
- override string getFunctionName() { result = "op_Equality" }
-
override string getAPrimaryQlClass() { result = "EQOperator" }
}
@@ -869,8 +942,6 @@ class EQOperator extends BinaryOperator {
class NEOperator extends BinaryOperator {
NEOperator() { this.getName() = "!=" }
- override string getFunctionName() { result = "op_Inequality" }
-
override string getAPrimaryQlClass() { result = "NEOperator" }
}
@@ -886,8 +957,6 @@ class NEOperator extends BinaryOperator {
class LTOperator extends BinaryOperator {
LTOperator() { this.getName() = "<" }
- override string getFunctionName() { result = "op_LessThan" }
-
override string getAPrimaryQlClass() { result = "LTOperator" }
}
@@ -903,8 +972,6 @@ class LTOperator extends BinaryOperator {
class GTOperator extends BinaryOperator {
GTOperator() { this.getName() = ">" }
- override string getFunctionName() { result = "op_GreaterThan" }
-
override string getAPrimaryQlClass() { result = "GTOperator" }
}
@@ -920,8 +987,6 @@ class GTOperator extends BinaryOperator {
class LEOperator extends BinaryOperator {
LEOperator() { this.getName() = "<=" }
- override string getFunctionName() { result = "op_LessThanOrEqual" }
-
override string getAPrimaryQlClass() { result = "LEOperator" }
}
@@ -937,8 +1002,6 @@ class LEOperator extends BinaryOperator {
class GEOperator extends BinaryOperator {
GEOperator() { this.getName() = ">=" }
- override string getFunctionName() { result = "op_GreaterThanOrEqual" }
-
override string getAPrimaryQlClass() { result = "GEOperator" }
}
@@ -954,7 +1017,8 @@ class GEOperator extends BinaryOperator {
class ConversionOperator extends Operator {
ConversionOperator() {
this.getName() = "implicit conversion" or
- this.getName() = "explicit conversion"
+ this.getName() = "explicit conversion" or
+ this.getName() = "checked explicit conversion"
}
/** Gets the source type of the conversion. */
@@ -976,8 +1040,6 @@ class ConversionOperator extends Operator {
class ImplicitConversionOperator extends ConversionOperator {
ImplicitConversionOperator() { this.getName() = "implicit conversion" }
- override string getFunctionName() { result = "op_Implicit" }
-
override string getAPrimaryQlClass() { result = "ImplicitConversionOperator" }
}
@@ -993,11 +1055,24 @@ class ImplicitConversionOperator extends ConversionOperator {
class ExplicitConversionOperator extends ConversionOperator {
ExplicitConversionOperator() { this.getName() = "explicit conversion" }
- override string getFunctionName() { result = "op_Explicit" }
-
override string getAPrimaryQlClass() { result = "ExplicitConversionOperator" }
}
+/**
+ * A user-defined checked explicit conversion operator, for example
+ *
+ * ```csharp
+ * public static explicit operator checked int(BigInteger i) {
+ * ...
+ * }
+ * ```
+ */
+class CheckedExplicitConversionOperator extends ConversionOperator {
+ CheckedExplicitConversionOperator() { this.getName() = "checked explicit conversion" }
+
+ override string getAPrimaryQlClass() { result = "CheckedExplicitConversionOperator" }
+}
+
/**
* A local function, defined within the scope of another callable.
* For example, `Fac` on lines 2--4 in
diff --git a/csharp/ql/lib/semmle/code/csharp/File.qll b/csharp/ql/lib/semmle/code/csharp/File.qll
index e4e0d3c6c26..79406aec2f6 100644
--- a/csharp/ql/lib/semmle/code/csharp/File.qll
+++ b/csharp/ql/lib/semmle/code/csharp/File.qll
@@ -3,184 +3,34 @@
*/
private import Comments
+private import codeql.util.FileSystem
-/** A file or folder. */
-class Container extends @container {
- /**
- * Gets the absolute, canonical path of this container, using forward slashes
- * as path separator.
- *
- * The path starts with a _root prefix_ followed by zero or more _path
- * segments_ separated by forward slashes.
- *
- * The root prefix is of one of the following forms:
- *
- * 1. A single forward slash `/` (Unix-style)
- * 2. An upper-case drive letter followed by a colon and a forward slash,
- * such as `C:/` (Windows-style)
- * 3. Two forward slashes, a computer name, and then another forward slash,
- * such as `//FileServer/` (UNC-style)
- *
- * Path segments are never empty (that is, absolute paths never contain two
- * contiguous slashes, except as part of a UNC-style root prefix). Also, path
- * segments never contain forward slashes, and no path segment is of the
- * form `.` (one dot) or `..` (two dots).
- *
- * Note that an absolute path never ends with a forward slash, except if it is
- * a bare root prefix, that is, the path has no path segments. A container
- * whose absolute path has no segments is always a `Folder`, not a `File`.
- */
- string getAbsolutePath() { none() }
+private module Input implements InputSig {
+ abstract class ContainerBase extends @container {
+ abstract string getAbsolutePath();
- /**
- * Gets a URL representing the location of this container.
- *
- * For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
- */
- string getURL() { none() }
+ ContainerBase getParentContainer() { containerparent(result, this) }
- /**
- * Gets the relative path of this file or folder from the root folder of the
- * analyzed source location. The relative path of the root folder itself is
- * the empty string.
- *
- * This has no result if the container is outside the source root, that is,
- * if the root folder is not a reflexive, transitive parent of this container.
- */
- string getRelativePath() {
- exists(string absPath, string pref |
- absPath = this.getAbsolutePath() and sourceLocationPrefix(pref)
- |
- absPath = pref and result = ""
- or
- absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
- not result.matches("/%")
- )
+ string toString() { result = this.getAbsolutePath() }
}
- /**
- * Gets the base name of this container including extension, that is, the last
- * segment of its absolute path, or the empty string if it has no segments.
- *
- * Here are some examples of absolute paths and the corresponding base names
- * (surrounded with quotes to avoid ambiguity):
- *
- *
- *
Absolute path
Base name
- *
"/tmp/tst.cs"
"tst.cs"
- *
"C:/Program Files (x86)"
"Program Files (x86)"
- *
"/"
""
- *
"C:/"
""
- *
"D:/"
""
- *
"//FileServer/"
""
- *
- */
- string getBaseName() {
- result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
+ class FolderBase extends ContainerBase, @folder {
+ override string getAbsolutePath() { folders(this, result) }
}
- /**
- * Gets the extension of this container, that is, the suffix of its base name
- * after the last dot character, if any.
- *
- * In particular,
- *
- * - if the name does not include a dot, there is no extension, so this
- * predicate has no result;
- * - if the name ends in a dot, the extension is the empty string;
- * - if the name contains multiple dots, the extension follows the last dot.
- *
- * Here are some examples of absolute paths and the corresponding extensions
- * (surrounded with quotes to avoid ambiguity):
- *
- *
- *
Absolute path
Extension
- *
"/tmp/tst.cs"
"cs"
- *
"/tmp/.classpath"
"classpath"
- *
"/bin/bash"
not defined
- *
"/tmp/tst2."
""
- *
"/tmp/x.tar.gz"
"gz"
- *
- */
- string getExtension() {
- result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
+ class FileBase extends ContainerBase, @file {
+ override string getAbsolutePath() { files(this, result) }
}
- /**
- * Gets the stem of this container, that is, the prefix of its base name up to
- * (but not including) the last dot character if there is one, or the entire
- * base name if there is not.
- *
- * Here are some examples of absolute paths and the corresponding stems
- * (surrounded with quotes to avoid ambiguity):
- *
- *
+
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/main/resources/struts.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/main/resources/struts.xml
new file mode 100644
index 00000000000..73fc0c6b9cb
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/main/resources/struts.xml
@@ -0,0 +1,4 @@
+
+
+This is a sample file
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/test/java/com/example/AppTest.java b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/test/java/com/example/AppTest.java
new file mode 100644
index 00000000000..22a94ca6f01
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/src/test/java/com/example/AppTest.java
@@ -0,0 +1,20 @@
+package com.example;
+
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+{
+ /**
+ * Rigorous Test :-)
+ */
+ @Test
+ public void shouldAnswerWithTrue()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/test.py b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/test.py
new file mode 100644
index 00000000000..ee07cd14bdf
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/dependency-error/test.py
@@ -0,0 +1,16 @@
+import os
+import pathlib
+import shutil
+
+from create_database_utils import *
+from diagnostics_test_utils import *
+
+# Ensure the intended dependency download failure is not cached:
+try:
+ shutil.rmtree(pathlib.Path.home().joinpath(".m2", "repository", "junit", "junit-nonesuch"))
+except FileNotFoundError:
+ pass
+
+run_codeql_database_create([], lang="java", runFunction = runUnsuccessfully, db = None)
+
+check_diagnostics()
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitattributes b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitattributes
new file mode 100644
index 00000000000..00a51aff5e5
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitattributes
@@ -0,0 +1,6 @@
+#
+# https://help.github.com/articles/dealing-with-line-endings/
+#
+# These are explicitly windows files and should use crlf
+*.bat text eol=crlf
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitignore b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitignore
new file mode 100644
index 00000000000..1b6985c0094
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/.gitignore
@@ -0,0 +1,5 @@
+# Ignore Gradle project-specific cache directory
+.gradle
+
+# Ignore Gradle build output directory
+build
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/build.gradle b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/build.gradle
new file mode 100644
index 00000000000..c3b774e3d50
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/build.gradle
@@ -0,0 +1,34 @@
+/*
+ * This build file was auto generated by running the Gradle 'init' task
+ * by 'arthur' at '28/11/20 22:29' with Gradle 3.0
+ *
+ * This generated file contains a sample Java project to get you started.
+ * For more details take a look at the Java Quickstart chapter in the Gradle
+ * user guide available at https://docs.gradle.org/3.0/userguide/tutorial_java_projects.html
+ */
+
+// Apply the java plugin to add support for Java
+apply plugin: 'java'
+
+// In this section you declare where to find the dependencies of your project
+repositories {
+ // Use 'jcenter' for resolving your dependencies.
+ // You can declare any Maven/Ivy/file repository here.
+ jcenter()
+}
+
+// In this section you declare the dependencies for your production and test code
+dependencies {
+ // The production code uses the SLF4J logging API at compile time
+ compile 'org.slf4j:slf4j-api:1.7.21'
+
+ // Declare the dependency for your favourite test framework you want to use in your tests.
+ // TestNG is also supported by the Gradle Test task. Just change the
+ // testCompile dependency to testCompile 'org.testng:testng:6.8.1' and add
+ // 'test.useTestNG()' to your build script.
+ testCompile 'junit:junit:4.12'
+}
+
+java {
+ sourceCompatibility = JavaVersion.VERSION_11
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/diagnostics.expected b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/diagnostics.expected
new file mode 100644
index 00000000000..a48a1e1dd07
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/diagnostics.expected
@@ -0,0 +1,28 @@
+{
+ "markdownMessage": "Your project may need a different JDK version. Ensure your Code Scanning workflow file has [an appropriate `setup-java` step](https://github.com/actions/setup-java#eclipse-temurin). Suspicious output line: `> Could not target platform: 'Java SE 11' using tool chain: 'JDK 8 (1.8)'.`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/wrong-jdk-version",
+ "name": "Your project may need a different JDK version"
+ },
+ "visibility": {
+ "cliSummaryTable": true,
+ "statusPage": true,
+ "telemetry": true
+ }
+}
+{
+ "markdownMessage": "Your project may need a different JDK version. Ensure your Code Scanning workflow file has [an appropriate `setup-java` step](https://github.com/actions/setup-java#eclipse-temurin). Suspicious output line: `Caused by: java.lang.IllegalArgumentException: Could not target platform: 'Java SE 11' using tool chain: 'JDK 8 (1.8)'.`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/wrong-jdk-version",
+ "name": "Your project may need a different JDK version"
+ },
+ "visibility": {
+ "cliSummaryTable": false,
+ "statusPage": false,
+ "telemetry": true
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/force_sequential_test_execution b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/force_sequential_test_execution
new file mode 100644
index 00000000000..b0e2500b259
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/force_sequential_test_execution
@@ -0,0 +1,3 @@
+# We currently have a bug where gradle tests become flaky when executed in parallel
+# - sometimes, gradle fails to connect to the gradle daemon.
+# Therefore, force this test to run sequentially.
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/verification-metadata.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/verification-metadata.xml
new file mode 100644
index 00000000000..14a69b8178b
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/verification-metadata.xml
@@ -0,0 +1,7 @@
+
+
+
+ true
+ false
+
+
\ No newline at end of file
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.jar b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 00000000000..e708b1c023e
Binary files /dev/null and b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.properties b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000000..12d38de6a48
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew
new file mode 100755
index 00000000000..4f906e0c811
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew.bat b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew.bat
new file mode 100644
index 00000000000..107acd32c4e
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/settings.gradle b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/settings.gradle
new file mode 100644
index 00000000000..233410459f6
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/settings.gradle
@@ -0,0 +1,19 @@
+/*
+ * This settings file was auto generated by the Gradle buildInit task
+ * by 'arthur' at '28/11/20 22:29' with Gradle 3.0
+ *
+ * The settings file is used to specify which projects to include in your build.
+ * In a single project build this file can be empty or even removed.
+ *
+ * Detailed information about configuring a multi-project build in Gradle can be found
+ * in the user guide at https://docs.gradle.org/3.0/userguide/multi_project_builds.html
+ */
+
+/*
+// To declare projects as part of a multi-project build use the 'include' method
+include 'shared'
+include 'api'
+include 'services:webservice'
+*/
+
+rootProject.name = 'gradle-sample'
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/main/java/com/example/App.java b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/main/java/com/example/App.java
new file mode 100644
index 00000000000..1c13f7d885e
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/main/java/com/example/App.java
@@ -0,0 +1,14 @@
+/*
+ * This Java source file was generated by the Gradle 'init' task.
+ */
+package com.example;
+
+public class App {
+ public String getGreeting() {
+ return "Hello world.";
+ }
+
+ public static void main(String[] args) {
+ System.out.println(new App().getGreeting());
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/test/java/com/example/AppTest.java b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/test/java/com/example/AppTest.java
new file mode 100644
index 00000000000..813bc5e1a2a
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/src/test/java/com/example/AppTest.java
@@ -0,0 +1,14 @@
+/*
+ * This Java source file was generated by the Gradle 'init' task.
+ */
+package com.example;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class AppTest {
+ @Test public void testAppHasAGreeting() {
+ App classUnderTest = new App();
+ assertNotNull("app should have a greeting", classUnderTest.getGreeting());
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/test.py b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/test.py
new file mode 100644
index 00000000000..9def03947b3
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/java-version-too-old/test.py
@@ -0,0 +1,13 @@
+import os
+from create_database_utils import *
+from diagnostics_test_utils import *
+
+# Ensure we're using an old Java version that won't work with Gradle
+if "JAVA_HOME_8_X64" in os.environ:
+ os.environ["JAVA_HOME"] = os.environ["JAVA_HOME_8_X64"]
+ sep = ";" if platform.system() == "Windows" else ":"
+ os.environ["PATH"] = "".join([os.path.join(os.environ["JAVA_HOME"], "bin"), sep, os.environ["PATH"]])
+
+run_codeql_database_create([], lang="java", runFunction = runUnsuccessfully, db = None)
+
+check_diagnostics()
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/diagnostics.expected b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/diagnostics.expected
new file mode 100644
index 00000000000..56c315303c1
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/diagnostics.expected
@@ -0,0 +1,56 @@
+{
+ "markdownMessage": "Building your code triggered an access to an insecure HTTP Maven repository. Allow access to insecure repositories, or [update your build to use HTTPS](https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked). Suspicious output line: `Caused by: org.eclipse.aether.resolution.ArtifactResolutionException: Could not transfer artifact junit-nonesuch:junit-nonesuch:pom:4.11 from/to maven-default-http-blocker (http://0.0.0.0/): Blocked mirror for repositories: [insecure (http://repo.maven.apache.org/maven2/, default, releases+snapshots)]`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/non-https-repository",
+ "name": "A non-https Maven repository access failed"
+ },
+ "visibility": {
+ "cliSummaryTable": false,
+ "statusPage": false,
+ "telemetry": true
+ }
+}
+{
+ "markdownMessage": "Building your code triggered an access to an insecure HTTP Maven repository. Allow access to insecure repositories, or [update your build to use HTTPS](https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked). Suspicious output line: `Caused by: org.eclipse.aether.transfer.ArtifactTransferException: Could not transfer artifact junit-nonesuch:junit-nonesuch:pom:4.11 from/to maven-default-http-blocker (http://0.0.0.0/): Blocked mirror for repositories: [insecure (http://repo.maven.apache.org/maven2/, default, releases+snapshots)]`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/non-https-repository",
+ "name": "A non-https Maven repository access failed"
+ },
+ "visibility": {
+ "cliSummaryTable": false,
+ "statusPage": false,
+ "telemetry": true
+ }
+}
+{
+ "markdownMessage": "Building your code triggered an access to an insecure HTTP Maven repository. Allow access to insecure repositories, or [update your build to use HTTPS](https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked). Suspicious output line: `Caused by: org.eclipse.aether.transfer.NoRepositoryConnectorException: Blocked mirror for repositories: [insecure (http://repo.maven.apache.org/maven2/, default, releases+snapshots)]`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/non-https-repository",
+ "name": "A non-https Maven repository access failed"
+ },
+ "visibility": {
+ "cliSummaryTable": false,
+ "statusPage": false,
+ "telemetry": true
+ }
+}
+{
+ "markdownMessage": "Building your code triggered an access to an insecure HTTP Maven repository. Allow access to insecure repositories, or [update your build to use HTTPS](https://maven.apache.org/docs/3.8.1/release-notes.html#how-to-fix-when-i-get-a-http-repository-blocked). Suspicious output line: `[ERROR] Failed to execute goal on project maven-sample: Could not resolve dependencies for project com.example:maven-sample:jar:1.0-SNAPSHOT: Failed to collect dependencies at junit-nonesuch:junit-nonesuch:jar:4.11: Failed to read artifact descriptor for junit-nonesuch:junit-nonesuch:jar:4.11: Could not transfer artifact junit-nonesuch:junit-nonesuch:pom:4.11 from/to maven-default-http-blocker (http://0.0.0.0/): Blocked mirror for repositories: [insecure (http://repo.maven.apache.org/maven2/, default, releases+snapshots)] -> [Help 1]`",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/non-https-repository",
+ "name": "A non-https Maven repository access failed"
+ },
+ "visibility": {
+ "cliSummaryTable": true,
+ "statusPage": true,
+ "telemetry": true
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/pom.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/pom.xml
new file mode 100644
index 00000000000..a1ae8eda7e6
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/pom.xml
@@ -0,0 +1,123 @@
+
+
+
+ 4.0.0
+
+ com.example
+ maven-sample
+ 1.0-SNAPSHOT
+
+ maven-sample
+
+ http://www.example.com
+
+
+ UTF-8
+ 1.7
+ 1.7
+
+
+
+
+ insecure
+ Insecure HTTP Repository
+ http://repo.maven.apache.org/maven2/
+ default
+
+
+
+
+
+ junit-nonesuch
+ junit-nonesuch
+ 4.11
+ test
+
+
+
+
+
+
+ exec-maven-plugin
+ org.codehaus.mojo
+ 1.1.1
+
+
+ check-maven-version
+ package
+
+ java
+
+
+
+
+ com.example.App
+
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+ 2.19.1
+
+
+
+ check
+
+ compile
+
+
+
+
+
+ /* FAIL ME */
+
+
+
+
+
+
+
+
+
+
+ maven-clean-plugin
+ 3.1.0
+
+
+
+ maven-resources-plugin
+ 3.0.2
+
+
+ maven-compiler-plugin
+ 3.8.0
+
+
+ maven-surefire-plugin
+ 2.22.1
+
+
+ maven-jar-plugin
+ 3.0.2
+
+
+ maven-install-plugin
+ 2.5.2
+
+
+ maven-deploy-plugin
+ 2.8.2
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+ maven-project-info-reports-plugin
+ 3.0.0
+
+
+
+
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/java/com/example/App.java b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/java/com/example/App.java
new file mode 100644
index 00000000000..c9eec918587
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/java/com/example/App.java
@@ -0,0 +1,30 @@
+package com.example;
+
+import java.util.regex.Pattern;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * Hello world!
+ *
+ */
+public class App
+{
+ public static void main( String[] args )
+ {
+ System.out.println( "Hello World!" );
+ String expectedVersion = System.getenv("EXPECT_MAVEN");
+ Path mavenHome = Paths.get(System.getProperty("maven.home")).normalize();
+ String observedVersion = mavenHome.getFileName().toString();
+ if (expectedVersion != null && !expectedVersion.equals(observedVersion)) {
+ System.err.println("Wrong maven version, expected '" + expectedVersion + "' but got '" + observedVersion + "'" + mavenHome);
+ System.exit(1);
+ }
+ String commandMatcher = System.getenv("EXPECT_COMMAND_REGEX");
+ String command = System.getProperty("sun.java.command");
+ if (commandMatcher != null && !Pattern.matches(commandMatcher, command)) {
+ System.err.println("Wrong command line, '" + command + "' does not match '" + commandMatcher + "'");
+ System.exit(1);
+ }
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/my-app.properties b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/my-app.properties
new file mode 100644
index 00000000000..e566b49a29a
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/my-app.properties
@@ -0,0 +1 @@
+version=1.0
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/page.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/page.xml
new file mode 100644
index 00000000000..2bab459cb03
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/page.xml
@@ -0,0 +1,8 @@
+
+
+A sample
+
+
+
Hello world!
+
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/struts.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/struts.xml
new file mode 100644
index 00000000000..73fc0c6b9cb
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/main/resources/struts.xml
@@ -0,0 +1,4 @@
+
+
+This is a sample file
+
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/test/java/com/example/AppTest.java b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/test/java/com/example/AppTest.java
new file mode 100644
index 00000000000..22a94ca6f01
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/src/test/java/com/example/AppTest.java
@@ -0,0 +1,20 @@
+package com.example;
+
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+{
+ /**
+ * Rigorous Test :-)
+ */
+ @Test
+ public void shouldAnswerWithTrue()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/test.py b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/test.py
new file mode 100644
index 00000000000..23fe1d32fd8
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/maven-http-repository/test.py
@@ -0,0 +1,7 @@
+import os
+from create_database_utils import *
+from diagnostics_test_utils import *
+
+run_codeql_database_create([], lang="java", runFunction = runUnsuccessfully, db = None)
+
+check_diagnostics()
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/diagnostics.expected b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/diagnostics.expected
new file mode 100644
index 00000000000..07c5d942baa
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/diagnostics.expected
@@ -0,0 +1,28 @@
+{
+ "markdownMessage": "Building using Maven was skipped because there were multiple sibling build directories containing build files: [./maven-project-1,./maven-project-2]. If you want to use one of these, please [manually supply a build command](https://docs.github.com/en/github-ae@latest/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-the-codeql-workflow-for-compiled-languages#adding-build-steps-for-a-compiled-language)",
+ "severity": "warning",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/multiple-candidate-build-directories",
+ "name": "Multiple candidate Maven build directories"
+ },
+ "visibility": {
+ "cliSummaryTable": true,
+ "statusPage": true,
+ "telemetry": true
+ }
+}
+{
+ "markdownMessage": "If you want to use one of the candidate build systems and directories (see previous warnings), please [supply a manual a build command](https://docs.github.com/en/github-ae@latest/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-the-codeql-workflow-for-compiled-languages#adding-build-steps-for-a-compiled-language)",
+ "severity": "error",
+ "source": {
+ "extractorName": "java",
+ "id": "java/autobuilder/multiple-candidate-build-directories-fatal",
+ "name": "No build system could identify a unique top-level project"
+ },
+ "visibility": {
+ "cliSummaryTable": true,
+ "statusPage": true,
+ "telemetry": true
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/pom.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/pom.xml
new file mode 100644
index 00000000000..ec4aaf128c1
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/pom.xml
@@ -0,0 +1,114 @@
+
+
+
+ 4.0.0
+
+ com.example
+ maven-sample
+ 1.0-SNAPSHOT
+
+ maven-sample
+
+ http://www.example.com
+
+
+ UTF-8
+ 1.7
+ 1.7
+
+
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+
+
+
+ exec-maven-plugin
+ org.codehaus.mojo
+ 1.1.1
+
+
+ check-maven-version
+ package
+
+ java
+
+
+
+
+ com.example.App
+
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+ 2.19.1
+
+
+
+ check
+
+ compile
+
+
+
+
+
+ /* FAIL ME */
+
+
+
+
+
+
+
+
+
+
+ maven-clean-plugin
+ 3.1.0
+
+
+
+ maven-resources-plugin
+ 3.0.2
+
+
+ maven-compiler-plugin
+ 3.8.0
+
+
+ maven-surefire-plugin
+ 2.22.1
+
+
+ maven-jar-plugin
+ 3.0.2
+
+
+ maven-install-plugin
+ 2.5.2
+
+
+ maven-deploy-plugin
+ 2.8.2
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+ maven-project-info-reports-plugin
+ 3.0.0
+
+
+
+
+
\ No newline at end of file
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/java/com/example/App.java b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/java/com/example/App.java
new file mode 100644
index 00000000000..c9eec918587
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/java/com/example/App.java
@@ -0,0 +1,30 @@
+package com.example;
+
+import java.util.regex.Pattern;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * Hello world!
+ *
+ */
+public class App
+{
+ public static void main( String[] args )
+ {
+ System.out.println( "Hello World!" );
+ String expectedVersion = System.getenv("EXPECT_MAVEN");
+ Path mavenHome = Paths.get(System.getProperty("maven.home")).normalize();
+ String observedVersion = mavenHome.getFileName().toString();
+ if (expectedVersion != null && !expectedVersion.equals(observedVersion)) {
+ System.err.println("Wrong maven version, expected '" + expectedVersion + "' but got '" + observedVersion + "'" + mavenHome);
+ System.exit(1);
+ }
+ String commandMatcher = System.getenv("EXPECT_COMMAND_REGEX");
+ String command = System.getProperty("sun.java.command");
+ if (commandMatcher != null && !Pattern.matches(commandMatcher, command)) {
+ System.err.println("Wrong command line, '" + command + "' does not match '" + commandMatcher + "'");
+ System.exit(1);
+ }
+ }
+}
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/my-app.properties b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/my-app.properties
new file mode 100644
index 00000000000..e566b49a29a
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/my-app.properties
@@ -0,0 +1 @@
+version=1.0
diff --git a/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/page.xml b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/page.xml
new file mode 100644
index 00000000000..2bab459cb03
--- /dev/null
+++ b/java/ql/integration-tests/all-platforms/java/diagnostics/multiple-candidate-builds/maven-project-1/src/main/resources/page.xml
@@ -0,0 +1,8 @@
+
+
+A sample
+
+
+
\w+)\("
- r"(?:\s*//dir=(?P\S*))?(?P[^\)]*)"
- r"\);?"
- "|"
- r"^(?P@\w+)\s*=\s*(?P@\w+(?:\s*\|\s*@\w+)*)\s*;?"
- )
- field = re.compile(r"(?m)[\w\s]*\s(?P\w+)\s*:\s*(?P@?\w+)(?P\s+ref)?")
- key = re.compile(r"@\w+")
- comment = re.compile(r"(?m)(?s)/\*.*?\*/|//(?!dir=)[^\n]*$") # lookahead avoid ignoring metadata like //dir=foo
-
-
-def get_column(match):
- return Column(
- schema_name=match["field"].rstrip("_"),
- type=match["type"],
- binding=not match["ref"],
- )
-
-
-def get_table(match):
- keyset = None
- if match["tablekeys"]:
- keyset = KeySet(k.strip() for k in match["tablekeys"].split(","))
- return Table(
- name=match["table"],
- columns=[get_column(f) for f in Re.field.finditer(match["tablebody"])],
- keyset=keyset,
- dir=pathlib.PosixPath(match["tabledir"]) if match["tabledir"] else None,
- )
-
-
-def get_union(match):
- return Union(
- lhs=match["union"],
- rhs=(d[0] for d in Re.key.finditer(match["unionbody"])),
- )
-
-
-def iterload(file):
- with open(file) as file:
- data = Re.comment.sub("", file.read())
- for e in Re.entity.finditer(data):
- if e["table"]:
- yield get_table(e)
- elif e["union"]:
- yield get_union(e)
diff --git a/swift/codegen/lib/paths.py b/misc/codegen/lib/paths.py
similarity index 50%
rename from swift/codegen/lib/paths.py
rename to misc/codegen/lib/paths.py
index 2ad1284b17b..b102987a226 100644
--- a/swift/codegen/lib/paths.py
+++ b/misc/codegen/lib/paths.py
@@ -4,15 +4,16 @@ import pathlib
import sys
import os
+_this_file = pathlib.Path(__file__).resolve()
+
try:
workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY']).resolve() # <- means we are using bazel run
- swift_dir = workspace_dir / 'swift'
+ root_dir = workspace_dir / 'swift'
except KeyError:
- _this_file = pathlib.Path(__file__).resolve()
- swift_dir = _this_file.parents[2]
- workspace_dir = swift_dir.parent
+ root_dir = _this_file.parents[2]
+ workspace_dir = root_dir.parent
-lib_dir = swift_dir / 'codegen' / 'lib'
-templates_dir = swift_dir / 'codegen' / 'templates'
+lib_dir = _this_file.parents[2] / 'codegen' / 'lib'
+templates_dir = _this_file.parents[2] / 'codegen' / 'templates'
exe_file = pathlib.Path(sys.argv[0]).resolve()
diff --git a/swift/codegen/lib/ql.py b/misc/codegen/lib/ql.py
similarity index 90%
rename from swift/codegen/lib/ql.py
rename to misc/codegen/lib/ql.py
index c40c42aa33f..ace39a61e0e 100644
--- a/swift/codegen/lib/ql.py
+++ b/misc/codegen/lib/ql.py
@@ -97,6 +97,7 @@ class Class:
properties: List[Property] = field(default_factory=list)
dir: pathlib.Path = pathlib.Path()
imports: List[str] = field(default_factory=list)
+ import_prefix: Optional[str] = None
qltest_skip: bool = False
qltest_collapse_hierarchy: bool = False
qltest_uncollapse_hierarchy: bool = False
@@ -134,12 +135,30 @@ class Class:
return bool(self.doc) or self.ql_internal
+@dataclass
+class IpaUnderlyingAccessor:
+ argument: str
+ type: str
+ constructorparams: List[Param]
+
+ def __post_init__(self):
+ if self.constructorparams:
+ self.constructorparams = [Param(x) for x in self.constructorparams]
+ self.constructorparams[0].first = True
+
+
@dataclass
class Stub:
template: ClassVar = 'ql_stub'
name: str
base_import: str
+ import_prefix: str
+ ipa_accessors: List[IpaUnderlyingAccessor] = field(default_factory=list)
+
+ @property
+ def has_ipa_accessors(self) -> bool:
+ return bool(self.ipa_accessors)
@dataclass
@@ -161,7 +180,7 @@ class GetParentImplementation:
template: ClassVar = 'ql_parent'
classes: List[Class] = field(default_factory=list)
- additional_imports: List[str] = field(default_factory=list)
+ imports: List[str] = field(default_factory=list)
@dataclass
@@ -173,19 +192,23 @@ class PropertyForTest:
@dataclass
-class ClassTester:
+class TesterBase:
+ class_name: str
+ elements_module: str
+
+
+@dataclass
+class ClassTester(TesterBase):
template: ClassVar = 'ql_test_class'
- class_name: str
properties: List[PropertyForTest] = field(default_factory=list)
show_ql_class: bool = False
@dataclass
-class PropertyTester:
+class PropertyTester(TesterBase):
template: ClassVar = 'ql_test_property'
- class_name: str
property: PropertyForTest
@@ -273,6 +296,7 @@ class Synth:
template: ClassVar = "ql_ipa_types"
root: str
+ import_prefix: str
final_classes: List["Synth.FinalClass"] = field(default_factory=list)
non_final_classes: List["Synth.NonFinalClass"] = field(default_factory=list)
@@ -285,3 +309,4 @@ class Synth:
template: ClassVar = "ql_ipa_constructor_stub"
cls: "Synth.FinalClass"
+ import_prefix: str
diff --git a/swift/codegen/lib/render.py b/misc/codegen/lib/render.py
similarity index 91%
rename from swift/codegen/lib/render.py
rename to misc/codegen/lib/render.py
index 65014a42fa2..697c2f8c2c9 100644
--- a/swift/codegen/lib/render.py
+++ b/misc/codegen/lib/render.py
@@ -25,13 +25,13 @@ class Error(Exception):
class Renderer:
""" Template renderer using mustache templates in the `templates` directory """
- def __init__(self, swift_dir: pathlib.Path):
+ def __init__(self, generator: pathlib.Path, root_dir: pathlib.Path):
self._r = pystache.Renderer(search_dirs=str(paths.templates_dir), escape=lambda u: u)
- self._swift_dir = swift_dir
- self._generator = self._get_path(paths.exe_file)
+ self._root_dir = root_dir
+ self._generator = generator
def _get_path(self, file: pathlib.Path):
- return file.relative_to(self._swift_dir)
+ return file.relative_to(self._root_dir)
def render(self, data: object, output: pathlib.Path):
""" Render `data` to `output`.
@@ -60,7 +60,7 @@ class Renderer:
def manage(self, generated: typing.Iterable[pathlib.Path], stubs: typing.Iterable[pathlib.Path],
registry: pathlib.Path, force: bool = False) -> "RenderManager":
- return RenderManager(self._swift_dir, generated, stubs, registry, force)
+ return RenderManager(self._generator, self._root_dir, generated, stubs, registry, force)
class RenderManager(Renderer):
@@ -85,10 +85,10 @@ class RenderManager(Renderer):
pre: str
post: typing.Optional[str] = None
- def __init__(self, swift_dir: pathlib.Path, generated: typing.Iterable[pathlib.Path],
+ def __init__(self, generator: pathlib.Path, root_dir: pathlib.Path, generated: typing.Iterable[pathlib.Path],
stubs: typing.Iterable[pathlib.Path],
registry: pathlib.Path, force: bool = False):
- super().__init__(swift_dir)
+ super().__init__(generator, root_dir)
self._registry_path = registry
self._force = force
self._hashes = {}
@@ -106,7 +106,6 @@ class RenderManager(Renderer):
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_val is None:
for f in self._existing - self._skipped - self.written:
- self._hashes.pop(self._get_path(f), None)
f.unlink(missing_ok=True)
log.info(f"removed {f.name}")
for f in self.written:
@@ -116,6 +115,10 @@ class RenderManager(Renderer):
# so that they get the chance to be regenerated again during the next run
for f in self.written:
self._hashes.pop(self._get_path(f), None)
+ # clean up the registry from files that do not exist any more
+ for f in list(self._hashes):
+ if not (self._root_dir / f).exists():
+ self._hashes.pop(f)
self._dump_registry()
def _do_write(self, mnemonic: str, contents: str, output: pathlib.Path):
diff --git a/swift/codegen/lib/schema/schema.py b/misc/codegen/lib/schema.py
similarity index 53%
rename from swift/codegen/lib/schema/schema.py
rename to misc/codegen/lib/schema.py
index a2a1de0c05a..5e2974ae87d 100644
--- a/swift/codegen/lib/schema/schema.py
+++ b/misc/codegen/lib/schema.py
@@ -1,15 +1,9 @@
-""" schema.yml format representation """
-import pathlib
-import re
-import types
+""" schema format representation """
import typing
from dataclasses import dataclass, field
from typing import List, Set, Union, Dict, Optional
from enum import Enum, auto
import functools
-import importlib.util
-from toposort import toposort_flatten
-import inflection
class Error(Exception):
@@ -198,125 +192,3 @@ def split_doc(doc):
while trimmed and not trimmed[0]:
trimmed.pop(0)
return trimmed
-
-
-@dataclass
-class _PropertyNamer(PropertyModifier):
- name: str
-
- def modify(self, prop: Property):
- prop.name = self.name.rstrip("_")
-
-
-def _get_class(cls: type) -> Class:
- if not isinstance(cls, type):
- raise Error(f"Only class definitions allowed in schema, found {cls}")
- # we must check that going to dbscheme names and back is preserved
- # In particular this will not happen if uppercase acronyms are included in the name
- to_underscore_and_back = inflection.camelize(inflection.underscore(cls.__name__), uppercase_first_letter=True)
- if cls.__name__ != to_underscore_and_back:
- raise Error(f"Class name must be upper camel-case, without capitalized acronyms, found {cls.__name__} "
- f"instead of {to_underscore_and_back}")
- if len({b._group for b in cls.__bases__ if hasattr(b, "_group")}) > 1:
- raise Error(f"Bases with mixed groups for {cls.__name__}")
- if any(getattr(b, "_null", False) for b in cls.__bases__):
- raise Error(f"Null class cannot be derived")
- return Class(name=cls.__name__,
- bases=[b.__name__ for b in cls.__bases__ if b is not object],
- derived={d.__name__ for d in cls.__subclasses__()},
- # getattr to inherit from bases
- group=getattr(cls, "_group", ""),
- # in the following we don't use `getattr` to avoid inheriting
- pragmas=cls.__dict__.get("_pragmas", []),
- ipa=cls.__dict__.get("_ipa", None),
- properties=[
- a | _PropertyNamer(n)
- for n, a in cls.__dict__.get("__annotations__", {}).items()
- ],
- doc=split_doc(cls.__doc__),
- default_doc_name=cls.__dict__.get("_doc_name"),
- )
-
-
-def _toposort_classes_by_group(classes: typing.Dict[str, Class]) -> typing.Dict[str, Class]:
- groups = {}
- ret = {}
-
- for name, cls in classes.items():
- groups.setdefault(cls.group, []).append(name)
-
- for group, grouped in sorted(groups.items()):
- inheritance = {name: classes[name].bases for name in grouped}
- for name in toposort_flatten(inheritance):
- ret[name] = classes[name]
-
- return ret
-
-
-def _fill_ipa_information(classes: typing.Dict[str, Class]):
- """ Take a dictionary where the `ipa` field is filled for all explicitly synthesized classes
- and update it so that all non-final classes that have only synthesized final descendants
- get `True` as` value for the `ipa` field
- """
- if not classes:
- return
-
- is_ipa: typing.Dict[str, bool] = {}
-
- def fill_is_ipa(name: str):
- if name not in is_ipa:
- cls = classes[name]
- for d in cls.derived:
- fill_is_ipa(d)
- if cls.ipa is not None:
- is_ipa[name] = True
- elif not cls.derived:
- is_ipa[name] = False
- else:
- is_ipa[name] = all(is_ipa[d] for d in cls.derived)
-
- root = next(iter(classes))
- fill_is_ipa(root)
-
- for name, cls in classes.items():
- if cls.ipa is None and is_ipa[name]:
- cls.ipa = True
-
-
-def load(m: types.ModuleType) -> Schema:
- includes = set()
- classes = {}
- known = {"int", "string", "boolean"}
- known.update(n for n in m.__dict__ if not n.startswith("__"))
- import swift.codegen.lib.schema.defs as defs
- null = None
- for name, data in m.__dict__.items():
- if hasattr(defs, name):
- continue
- if name == "__includes":
- includes = set(data)
- continue
- if name.startswith("__"):
- continue
- cls = _get_class(data)
- if classes and not cls.bases:
- raise Error(
- f"Only one root class allowed, found second root {name}")
- cls.check_types(known)
- classes[name] = cls
- if getattr(data, "_null", False):
- if null is not None:
- raise Error(f"Null class {null} already defined, second null class {name} not allowed")
- null = name
- cls.is_null_class = True
-
- _fill_ipa_information(classes)
-
- return Schema(includes=includes, classes=_toposort_classes_by_group(classes), null=null)
-
-
-def load_file(path: pathlib.Path) -> Schema:
- spec = importlib.util.spec_from_file_location("schema", path)
- module = importlib.util.module_from_spec(spec)
- spec.loader.exec_module(module)
- return load(module)
diff --git a/swift/codegen/lib/schema/defs.py b/misc/codegen/lib/schemadefs.py
similarity index 98%
rename from swift/codegen/lib/schema/defs.py
rename to misc/codegen/lib/schemadefs.py
index d972e5a63e5..6c9457490e7 100644
--- a/swift/codegen/lib/schema/defs.py
+++ b/misc/codegen/lib/schemadefs.py
@@ -1,5 +1,5 @@
from typing import Callable as _Callable
-from swift.codegen.lib import schema as _schema
+from misc.codegen.lib import schema as _schema
import inspect as _inspect
from dataclasses import dataclass as _dataclass
diff --git a/misc/codegen/loaders/BUILD.bazel b/misc/codegen/loaders/BUILD.bazel
new file mode 100644
index 00000000000..be07c6d884b
--- /dev/null
+++ b/misc/codegen/loaders/BUILD.bazel
@@ -0,0 +1,11 @@
+load("@codegen_deps//:requirements.bzl", "requirement")
+
+py_library(
+ name = "loaders",
+ srcs = glob(["*.py"]),
+ visibility = ["//misc/codegen:__subpackages__"],
+ deps = [
+ requirement("toposort"),
+ requirement("inflection"),
+ ],
+)
diff --git a/misc/codegen/loaders/dbschemeloader.py b/misc/codegen/loaders/dbschemeloader.py
new file mode 100644
index 00000000000..51e362362a7
--- /dev/null
+++ b/misc/codegen/loaders/dbschemeloader.py
@@ -0,0 +1,54 @@
+import pathlib
+import re
+from misc.codegen.lib import dbscheme
+
+
+class _Re:
+ entity = re.compile(
+ "(?m)"
+ r"(?:^#keyset\[(?P[\w\s,]+)\][\s\n]*)?^(?P
\w+)\("
+ r"(?:\s*//dir=(?P\S*))?(?P[^\)]*)"
+ r"\);?"
+ "|"
+ r"^(?P@\w+)\s*=\s*(?P@\w+(?:\s*\|\s*@\w+)*)\s*;?"
+ )
+ field = re.compile(r"(?m)[\w\s]*\s(?P\w+)\s*:\s*(?P@?\w+)(?P\s+ref)?")
+ key = re.compile(r"@\w+")
+ comment = re.compile(r"(?m)(?s)/\*.*?\*/|//(?!dir=)[^\n]*$") # lookahead avoid ignoring metadata like //dir=foo
+
+
+def _get_column(match):
+ return dbscheme.Column(
+ schema_name=match["field"].rstrip("_"),
+ type=match["type"],
+ binding=not match["ref"],
+ )
+
+
+def _get_table(match):
+ keyset = None
+ if match["tablekeys"]:
+ keyset = dbscheme.KeySet(k.strip() for k in match["tablekeys"].split(","))
+ return dbscheme.Table(
+ name=match["table"],
+ columns=[_get_column(f) for f in _Re.field.finditer(match["tablebody"])],
+ keyset=keyset,
+ dir=pathlib.PosixPath(match["tabledir"]) if match["tabledir"] else None,
+ )
+
+
+def _get_union(match):
+ return dbscheme.Union(
+ lhs=match["union"],
+ rhs=(d[0] for d in _Re.key.finditer(match["unionbody"])),
+ )
+
+
+def iterload(file):
+ with open(file) as file:
+ data = _Re.comment.sub("", file.read())
+ for e in _Re.entity.finditer(data):
+ if e["table"]:
+ yield _get_table(e)
+ elif e["union"]:
+ yield _get_union(e)
diff --git a/misc/codegen/loaders/schemaloader.py b/misc/codegen/loaders/schemaloader.py
new file mode 100644
index 00000000000..5fd392b112d
--- /dev/null
+++ b/misc/codegen/loaders/schemaloader.py
@@ -0,0 +1,133 @@
+""" schema loader """
+
+import inflection
+import typing
+import types
+import pathlib
+import importlib.util
+from dataclasses import dataclass
+from toposort import toposort_flatten
+
+from misc.codegen.lib import schema, schemadefs
+
+
+@dataclass
+class _PropertyNamer(schema.PropertyModifier):
+ name: str
+
+ def modify(self, prop: schema.Property):
+ prop.name = self.name.rstrip("_")
+
+
+def _get_class(cls: type) -> schema.Class:
+ if not isinstance(cls, type):
+ raise schema.Error(f"Only class definitions allowed in schema, found {cls}")
+ # we must check that going to dbscheme names and back is preserved
+ # In particular this will not happen if uppercase acronyms are included in the name
+ to_underscore_and_back = inflection.camelize(inflection.underscore(cls.__name__), uppercase_first_letter=True)
+ if cls.__name__ != to_underscore_and_back:
+ raise schema.Error(f"Class name must be upper camel-case, without capitalized acronyms, found {cls.__name__} "
+ f"instead of {to_underscore_and_back}")
+ if len({b._group for b in cls.__bases__ if hasattr(b, "_group")}) > 1:
+ raise schema.Error(f"Bases with mixed groups for {cls.__name__}")
+ if any(getattr(b, "_null", False) for b in cls.__bases__):
+ raise schema.Error(f"Null class cannot be derived")
+ return schema.Class(name=cls.__name__,
+ bases=[b.__name__ for b in cls.__bases__ if b is not object],
+ derived={d.__name__ for d in cls.__subclasses__()},
+ # getattr to inherit from bases
+ group=getattr(cls, "_group", ""),
+ # in the following we don't use `getattr` to avoid inheriting
+ pragmas=cls.__dict__.get("_pragmas", []),
+ ipa=cls.__dict__.get("_ipa", None),
+ properties=[
+ a | _PropertyNamer(n)
+ for n, a in cls.__dict__.get("__annotations__", {}).items()
+ ],
+ doc=schema.split_doc(cls.__doc__),
+ default_doc_name=cls.__dict__.get("_doc_name"),
+ )
+
+
+def _toposort_classes_by_group(classes: typing.Dict[str, schema.Class]) -> typing.Dict[str, schema.Class]:
+ groups = {}
+ ret = {}
+
+ for name, cls in classes.items():
+ groups.setdefault(cls.group, []).append(name)
+
+ for group, grouped in sorted(groups.items()):
+ inheritance = {name: classes[name].bases for name in grouped}
+ for name in toposort_flatten(inheritance):
+ ret[name] = classes[name]
+
+ return ret
+
+
+def _fill_ipa_information(classes: typing.Dict[str, schema.Class]):
+ """ Take a dictionary where the `ipa` field is filled for all explicitly synthesized classes
+ and update it so that all non-final classes that have only synthesized final descendants
+ get `True` as` value for the `ipa` field
+ """
+ if not classes:
+ return
+
+ is_ipa: typing.Dict[str, bool] = {}
+
+ def fill_is_ipa(name: str):
+ if name not in is_ipa:
+ cls = classes[name]
+ for d in cls.derived:
+ fill_is_ipa(d)
+ if cls.ipa is not None:
+ is_ipa[name] = True
+ elif not cls.derived:
+ is_ipa[name] = False
+ else:
+ is_ipa[name] = all(is_ipa[d] for d in cls.derived)
+
+ root = next(iter(classes))
+ fill_is_ipa(root)
+
+ for name, cls in classes.items():
+ if cls.ipa is None and is_ipa[name]:
+ cls.ipa = True
+
+
+def load(m: types.ModuleType) -> schema.Schema:
+ includes = set()
+ classes = {}
+ known = {"int", "string", "boolean"}
+ known.update(n for n in m.__dict__ if not n.startswith("__"))
+ import misc.codegen.lib.schemadefs as defs
+ null = None
+ for name, data in m.__dict__.items():
+ if hasattr(defs, name):
+ continue
+ if name == "__includes":
+ includes = set(data)
+ continue
+ if name.startswith("__"):
+ continue
+ cls = _get_class(data)
+ if classes and not cls.bases:
+ raise schema.Error(
+ f"Only one root class allowed, found second root {name}")
+ cls.check_types(known)
+ classes[name] = cls
+ if getattr(data, "_null", False):
+ if null is not None:
+ raise schema.Error(f"Null class {null} already defined, second null class {name} not allowed")
+ null = name
+ cls.is_null_class = True
+
+ _fill_ipa_information(classes)
+
+ return schema.Schema(includes=includes, classes=_toposort_classes_by_group(classes), null=null)
+
+
+def load_file(path: pathlib.Path) -> schema.Schema:
+ spec = importlib.util.spec_from_file_location("schema", path)
+ module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(module)
+ return load(module)
diff --git a/swift/codegen/requirements.txt b/misc/codegen/requirements.txt
similarity index 100%
rename from swift/codegen/requirements.txt
rename to misc/codegen/requirements.txt
diff --git a/misc/codegen/schemadefs.py b/misc/codegen/schemadefs.py
new file mode 100644
index 00000000000..6c9457490e7
--- /dev/null
+++ b/misc/codegen/schemadefs.py
@@ -0,0 +1,149 @@
+from typing import Callable as _Callable
+from misc.codegen.lib import schema as _schema
+import inspect as _inspect
+from dataclasses import dataclass as _dataclass
+
+
+class _ChildModifier(_schema.PropertyModifier):
+ def modify(self, prop: _schema.Property):
+ if prop.type is None or prop.type[0].islower():
+ raise _schema.Error("Non-class properties cannot be children")
+ prop.is_child = True
+
+
+@_dataclass
+class _DocModifier(_schema.PropertyModifier):
+ doc: str
+
+ def modify(self, prop: _schema.Property):
+ if "\n" in self.doc or self.doc[-1] == ".":
+ raise _schema.Error("No newlines or trailing dots are allowed in doc, did you intend to use desc?")
+ prop.doc = self.doc
+
+
+@_dataclass
+class _DescModifier(_schema.PropertyModifier):
+ description: str
+
+ def modify(self, prop: _schema.Property):
+ prop.description = _schema.split_doc(self.description)
+
+
+def include(source: str):
+ # add to `includes` variable in calling context
+ _inspect.currentframe().f_back.f_locals.setdefault(
+ "__includes", []).append(source)
+
+
+class _Namespace:
+ """ simple namespacing mechanism """
+
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+
+qltest = _Namespace()
+ql = _Namespace()
+cpp = _Namespace()
+synth = _Namespace()
+
+
+@_dataclass
+class _Pragma(_schema.PropertyModifier):
+ """ A class or property pragma.
+ For properties, it functions similarly to a `_PropertyModifier` with `|`, adding the pragma.
+ For schema classes it acts as a python decorator with `@`.
+ """
+ pragma: str
+
+ def __post_init__(self):
+ namespace, _, name = self.pragma.partition('_')
+ setattr(globals()[namespace], name, self)
+
+ def modify(self, prop: _schema.Property):
+ prop.pragmas.append(self.pragma)
+
+ def __call__(self, cls: type) -> type:
+ """ use this pragma as a decorator on classes """
+ if "_pragmas" in cls.__dict__: # not using hasattr as we don't want to land on inherited pragmas
+ cls._pragmas.append(self.pragma)
+ else:
+ cls._pragmas = [self.pragma]
+ return cls
+
+
+class _Optionalizer(_schema.PropertyModifier):
+ def modify(self, prop: _schema.Property):
+ K = _schema.Property.Kind
+ if prop.kind != K.SINGLE:
+ raise _schema.Error(
+ "Optional should only be applied to simple property types")
+ prop.kind = K.OPTIONAL
+
+
+class _Listifier(_schema.PropertyModifier):
+ def modify(self, prop: _schema.Property):
+ K = _schema.Property.Kind
+ if prop.kind == K.SINGLE:
+ prop.kind = K.REPEATED
+ elif prop.kind == K.OPTIONAL:
+ prop.kind = K.REPEATED_OPTIONAL
+ else:
+ raise _schema.Error(
+ "Repeated should only be applied to simple or optional property types")
+
+
+class _TypeModifier:
+ """ Modifies types using get item notation """
+
+ def __init__(self, modifier: _schema.PropertyModifier):
+ self.modifier = modifier
+
+ def __getitem__(self, item):
+ return item | self.modifier
+
+
+_ClassDecorator = _Callable[[type], type]
+
+
+def _annotate(**kwargs) -> _ClassDecorator:
+ def f(cls: type) -> type:
+ for k, v in kwargs.items():
+ setattr(cls, f"_{k}", v)
+ return cls
+
+ return f
+
+
+boolean = "boolean"
+int = "int"
+string = "string"
+
+predicate = _schema.predicate_marker
+optional = _TypeModifier(_Optionalizer())
+list = _TypeModifier(_Listifier())
+
+child = _ChildModifier()
+doc = _DocModifier
+desc = _DescModifier
+
+use_for_null = _annotate(null=True)
+
+_Pragma("qltest_skip")
+_Pragma("qltest_collapse_hierarchy")
+_Pragma("qltest_uncollapse_hierarchy")
+
+ql.default_doc_name = lambda doc: _annotate(doc_name=doc)
+_Pragma("ql_internal")
+
+_Pragma("cpp_skip")
+
+
+def group(name: str = "") -> _ClassDecorator:
+ return _annotate(group=name)
+
+
+synth.from_class = lambda ref: _annotate(ipa=_schema.IpaInfo(
+ from_class=_schema.get_type_name(ref)))
+synth.on_arguments = lambda **kwargs: _annotate(
+ ipa=_schema.IpaInfo(on_arguments={k: _schema.get_type_name(t) for k, t in kwargs.items()}))
diff --git a/swift/codegen/templates/BUILD.bazel b/misc/codegen/templates/BUILD.bazel
similarity index 68%
rename from swift/codegen/templates/BUILD.bazel
rename to misc/codegen/templates/BUILD.bazel
index 7745e7ea2ac..a86346245af 100644
--- a/swift/codegen/templates/BUILD.bazel
+++ b/misc/codegen/templates/BUILD.bazel
@@ -1,4 +1,4 @@
-package(default_visibility = ["//swift:__subpackages__"])
+package(default_visibility = ["//misc/codegen:__subpackages__"])
filegroup(
name = "trap",
diff --git a/swift/codegen/templates/cpp_classes_cpp.mustache b/misc/codegen/templates/cpp_classes_cpp.mustache
similarity index 100%
rename from swift/codegen/templates/cpp_classes_cpp.mustache
rename to misc/codegen/templates/cpp_classes_cpp.mustache
diff --git a/swift/codegen/templates/cpp_classes_h.mustache b/misc/codegen/templates/cpp_classes_h.mustache
similarity index 95%
rename from swift/codegen/templates/cpp_classes_h.mustache
rename to misc/codegen/templates/cpp_classes_h.mustache
index a4a22170b2f..157bbc31217 100644
--- a/swift/codegen/templates/cpp_classes_h.mustache
+++ b/misc/codegen/templates/cpp_classes_h.mustache
@@ -6,8 +6,8 @@
#include
#include
-#include "swift/extractor/trap/TrapLabel.h"
-#include "swift/extractor/trap/TrapTagTraits.h"
+#include "{{trap_library}}/TrapLabel.h"
+#include "{{trap_library}}/TrapTagTraits.h"
#include "./TrapEntries.h"
{{#include_parent}}
#include "../TrapClasses.h"
diff --git a/swift/codegen/templates/dbscheme.mustache b/misc/codegen/templates/dbscheme.mustache
similarity index 100%
rename from swift/codegen/templates/dbscheme.mustache
rename to misc/codegen/templates/dbscheme.mustache
diff --git a/swift/codegen/templates/ql_class.mustache b/misc/codegen/templates/ql_class.mustache
similarity index 97%
rename from swift/codegen/templates/ql_class.mustache
rename to misc/codegen/templates/ql_class.mustache
index cd0b3ac2355..173df3a75b9 100644
--- a/swift/codegen/templates/ql_class.mustache
+++ b/misc/codegen/templates/ql_class.mustache
@@ -1,6 +1,6 @@
// generated by {{generator}}
-private import codeql.swift.generated.Synth
-private import codeql.swift.generated.Raw
+private import {{import_prefix}}.Synth
+private import {{import_prefix}}.Raw
{{#imports}}
import {{.}}
{{/imports}}
diff --git a/swift/codegen/templates/ql_db.mustache b/misc/codegen/templates/ql_db.mustache
similarity index 100%
rename from swift/codegen/templates/ql_db.mustache
rename to misc/codegen/templates/ql_db.mustache
diff --git a/swift/codegen/templates/ql_imports.mustache b/misc/codegen/templates/ql_imports.mustache
similarity index 100%
rename from swift/codegen/templates/ql_imports.mustache
rename to misc/codegen/templates/ql_imports.mustache
diff --git a/swift/codegen/templates/ql_ipa_constructor_stub.mustache b/misc/codegen/templates/ql_ipa_constructor_stub.mustache
similarity index 83%
rename from swift/codegen/templates/ql_ipa_constructor_stub.mustache
rename to misc/codegen/templates/ql_ipa_constructor_stub.mustache
index f207db66d21..e5e525417d3 100644
--- a/swift/codegen/templates/ql_ipa_constructor_stub.mustache
+++ b/misc/codegen/templates/ql_ipa_constructor_stub.mustache
@@ -1,9 +1,9 @@
// generated by {{generator}}, remove this comment if you wish to edit this file
-private import codeql.swift.generated.Raw
+private import {{import_prefix}}.Raw
{{#cls}}
{{#is_db}}
{{#has_subtracted_ipa_types}}
-private import codeql.swift.generated.PureSynthConstructors
+private import {{import_prefix}}.PureSynthConstructors
{{/has_subtracted_ipa_types}}
{{/is_db}}
diff --git a/swift/codegen/templates/ql_ipa_types.mustache b/misc/codegen/templates/ql_ipa_types.mustache
similarity index 93%
rename from swift/codegen/templates/ql_ipa_types.mustache
rename to misc/codegen/templates/ql_ipa_types.mustache
index 9a4eef5e6b5..430361f6776 100644
--- a/swift/codegen/templates/ql_ipa_types.mustache
+++ b/misc/codegen/templates/ql_ipa_types.mustache
@@ -1,5 +1,5 @@
-private import codeql.swift.generated.SynthConstructors
-private import codeql.swift.generated.Raw
+private import {{import_prefix}}.SynthConstructors
+private import {{import_prefix}}.Raw
cached module Synth {
cached newtype T{{root}} =
diff --git a/swift/codegen/templates/ql_parent.mustache b/misc/codegen/templates/ql_parent.mustache
similarity index 98%
rename from swift/codegen/templates/ql_parent.mustache
rename to misc/codegen/templates/ql_parent.mustache
index 53f80712aac..6777d366ddf 100644
--- a/swift/codegen/templates/ql_parent.mustache
+++ b/misc/codegen/templates/ql_parent.mustache
@@ -1,9 +1,8 @@
// generated by {{generator}}
-import codeql.swift.elements
-{{#additional_imports}}
+{{#imports}}
import {{.}}
-{{/additional_imports}}
+{{/imports}}
private module Impl {
{{#classes}}
diff --git a/swift/codegen/templates/ql_property_doc.mustache b/misc/codegen/templates/ql_property_doc.mustache
similarity index 100%
rename from swift/codegen/templates/ql_property_doc.mustache
rename to misc/codegen/templates/ql_property_doc.mustache
diff --git a/misc/codegen/templates/ql_stub.mustache b/misc/codegen/templates/ql_stub.mustache
new file mode 100644
index 00000000000..7e17efc6d46
--- /dev/null
+++ b/misc/codegen/templates/ql_stub.mustache
@@ -0,0 +1,18 @@
+// generated by {{generator}}, remove this comment if you wish to edit this file
+private import {{base_import}}
+{{#has_ipa_accessors}}
+private import {{import_prefix}}.Raw
+private import {{import_prefix}}.Synth
+{{/has_ipa_accessors}}
+
+{{#ql_internal}}
+/**
+ * INTERNAL: Do not use.
+ */
+{{/ql_internal}}
+class {{name}} extends Generated::{{name}} {
+ {{#ipa_accessors}}
+ private
+ cached {{type}} getUnderlying{{argument}}() { this = Synth::T{{name}}({{#constructorparams}}{{^first}},{{/first}}{{param}}{{/constructorparams}})}
+ {{/ipa_accessors}}
+}
diff --git a/swift/codegen/templates/ql_test_class.mustache b/misc/codegen/templates/ql_test_class.mustache
similarity index 94%
rename from swift/codegen/templates/ql_test_class.mustache
rename to misc/codegen/templates/ql_test_class.mustache
index adaf80de867..d689753cfd3 100644
--- a/swift/codegen/templates/ql_test_class.mustache
+++ b/misc/codegen/templates/ql_test_class.mustache
@@ -1,6 +1,6 @@
// generated by {{generator}}
-import codeql.swift.elements
+import {{elements_module}}
import TestUtils
from {{class_name}} x{{#properties}}, {{#type}}{{.}}{{/type}}{{^type}}string{{/type}} {{getter}}{{/properties}}
diff --git a/misc/codegen/templates/ql_test_missing.mustache b/misc/codegen/templates/ql_test_missing.mustache
new file mode 100644
index 00000000000..5a714744ef2
--- /dev/null
+++ b/misc/codegen/templates/ql_test_missing.mustache
@@ -0,0 +1,4 @@
+// generated by {{generator}}
+
+After a source file is added in this directory and {{generator}} is run again, test queries
+will appear and this file will be deleted
diff --git a/swift/codegen/templates/ql_test_property.mustache b/misc/codegen/templates/ql_test_property.mustache
similarity index 90%
rename from swift/codegen/templates/ql_test_property.mustache
rename to misc/codegen/templates/ql_test_property.mustache
index 46887384159..5e81298089d 100644
--- a/swift/codegen/templates/ql_test_property.mustache
+++ b/misc/codegen/templates/ql_test_property.mustache
@@ -1,6 +1,6 @@
// generated by {{generator}}
-import codeql.swift.elements
+import {{elements_module}}
import TestUtils
{{#property}}
diff --git a/swift/codegen/templates/trap_tags_h.mustache b/misc/codegen/templates/trap_tags_h.mustache
similarity index 100%
rename from swift/codegen/templates/trap_tags_h.mustache
rename to misc/codegen/templates/trap_tags_h.mustache
diff --git a/swift/codegen/templates/trap_traps_cpp.mustache b/misc/codegen/templates/trap_traps_cpp.mustache
similarity index 100%
rename from swift/codegen/templates/trap_traps_cpp.mustache
rename to misc/codegen/templates/trap_traps_cpp.mustache
diff --git a/swift/codegen/templates/trap_traps_h.mustache b/misc/codegen/templates/trap_traps_h.mustache
similarity index 82%
rename from swift/codegen/templates/trap_traps_h.mustache
rename to misc/codegen/templates/trap_traps_h.mustache
index d3bf7769bd7..987e980d24b 100644
--- a/swift/codegen/templates/trap_traps_h.mustache
+++ b/misc/codegen/templates/trap_traps_h.mustache
@@ -5,9 +5,9 @@
#include
#include
-#include "swift/extractor/trap/TrapLabel.h"
-#include "swift/extractor/trap/TrapTagTraits.h"
-#include "swift/extractor/trap/generated/TrapTags.h"
+#include "{{trap_library_dir}}/TrapLabel.h"
+#include "{{trap_library_dir}}/TrapTagTraits.h"
+#include "{{gen_dir}}/TrapTags.h"
namespace codeql {
{{#traps}}
diff --git a/swift/codegen/test/BUILD.bazel b/misc/codegen/test/BUILD.bazel
similarity index 72%
rename from swift/codegen/test/BUILD.bazel
rename to misc/codegen/test/BUILD.bazel
index 9ae41c14941..dde67283335 100644
--- a/swift/codegen/test/BUILD.bazel
+++ b/misc/codegen/test/BUILD.bazel
@@ -1,11 +1,11 @@
-load("@swift_codegen_deps//:requirements.bzl", "requirement")
+load("@codegen_deps//:requirements.bzl", "requirement")
py_library(
name = "utils",
testonly = True,
srcs = ["utils.py"],
deps = [
- "//swift/codegen/lib",
+ "//misc/codegen/lib",
requirement("pytest"),
],
)
@@ -17,7 +17,7 @@ py_library(
srcs = [src],
deps = [
":utils",
- "//swift/codegen/generators",
+ "//misc/codegen/generators",
],
)
for src in glob(["test_*.py"])
diff --git a/swift/codegen/test/test_cpp.py b/misc/codegen/test/test_cpp.py
similarity index 98%
rename from swift/codegen/test/test_cpp.py
rename to misc/codegen/test/test_cpp.py
index d04877e38c4..c4bee337a4f 100644
--- a/swift/codegen/test/test_cpp.py
+++ b/misc/codegen/test/test_cpp.py
@@ -3,7 +3,7 @@ from copy import deepcopy
import pytest
-from swift.codegen.lib import cpp
+from misc.codegen.lib import cpp
@pytest.mark.parametrize("keyword", cpp.cpp_keywords)
diff --git a/swift/codegen/test/test_cppgen.py b/misc/codegen/test/test_cppgen.py
similarity index 98%
rename from swift/codegen/test/test_cppgen.py
rename to misc/codegen/test/test_cppgen.py
index 3c62143e86a..278f184fcbc 100644
--- a/swift/codegen/test/test_cppgen.py
+++ b/misc/codegen/test/test_cppgen.py
@@ -1,8 +1,8 @@
import sys
-from swift.codegen.generators import cppgen
-from swift.codegen.lib import cpp
-from swift.codegen.test.utils import *
+from misc.codegen.generators import cppgen
+from misc.codegen.lib import cpp
+from misc.codegen.test.utils import *
output_dir = pathlib.Path("path", "to", "output")
diff --git a/misc/codegen/test/test_dbscheme.py b/misc/codegen/test/test_dbscheme.py
new file mode 100644
index 00000000000..e2635ecee5a
--- /dev/null
+++ b/misc/codegen/test/test_dbscheme.py
@@ -0,0 +1,52 @@
+import sys
+from copy import deepcopy
+
+from misc.codegen.lib import dbscheme
+from misc.codegen.test.utils import *
+
+
+def test_dbcolumn_name():
+ assert dbscheme.Column("foo", "some_type").name == "foo"
+
+
+@pytest.mark.parametrize("keyword", dbscheme.dbscheme_keywords)
+def test_dbcolumn_keyword_name(keyword):
+ assert dbscheme.Column(keyword, "some_type").name == keyword + "_"
+
+
+@pytest.mark.parametrize("type,binding,lhstype,rhstype", [
+ ("builtin_type", False, "builtin_type", "builtin_type ref"),
+ ("builtin_type", True, "builtin_type", "builtin_type ref"),
+ ("@at_type", False, "int", "@at_type ref"),
+ ("@at_type", True, "unique int", "@at_type"),
+])
+def test_dbcolumn_types(type, binding, lhstype, rhstype):
+ col = dbscheme.Column("foo", type, binding)
+ assert col.lhstype == lhstype
+ assert col.rhstype == rhstype
+
+
+def test_keyset_has_first_id_marked():
+ ids = ["a", "b", "c"]
+ ks = dbscheme.KeySet(ids)
+ assert ks.ids[0].first
+ assert [id.id for id in ks.ids] == ids
+
+
+def test_table_has_first_column_marked():
+ columns = [dbscheme.Column("a", "x"), dbscheme.Column("b", "y", binding=True), dbscheme.Column("c", "z")]
+ expected = deepcopy(columns)
+ table = dbscheme.Table("foo", columns)
+ expected[0].first = True
+ assert table.columns == expected
+
+
+def test_union_has_first_case_marked():
+ rhs = ["a", "b", "c"]
+ u = dbscheme.Union(lhs="x", rhs=rhs)
+ assert u.rhs[0].first
+ assert [c.type for c in u.rhs] == rhs
+
+
+if __name__ == '__main__':
+ sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/swift/codegen/test/test_dbschemegen.py b/misc/codegen/test/test_dbschemegen.py
similarity index 96%
rename from swift/codegen/test/test_dbschemegen.py
rename to misc/codegen/test/test_dbschemegen.py
index 4e560e8208e..7a986538b06 100644
--- a/swift/codegen/test/test_dbschemegen.py
+++ b/misc/codegen/test/test_dbschemegen.py
@@ -1,9 +1,9 @@
import collections
import sys
-from swift.codegen.generators import dbschemegen
-from swift.codegen.lib import dbscheme
-from swift.codegen.test.utils import *
+from misc.codegen.generators import dbschemegen
+from misc.codegen.lib import dbscheme
+from misc.codegen.test.utils import *
InputExpectedPair = collections.namedtuple("InputExpectedPair", ("input", "expected"))
@@ -30,7 +30,7 @@ def generate(opts, input, renderer):
def test_empty(generate):
assert generate([]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[],
)
@@ -43,10 +43,10 @@ def test_includes(input, opts, generate):
write(opts.schema.parent / i, i + " data")
assert generate([]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[
dbscheme.SchemeInclude(
- src=schema_dir / i,
+ src=pathlib.Path(i),
data=i + " data",
) for i in includes
],
@@ -58,7 +58,7 @@ def test_empty_final_class(generate, dir_param):
assert generate([
schema.Class("Object", group=dir_param.input),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -78,7 +78,7 @@ def test_final_class_with_single_scalar_field(generate, dir_param):
schema.SingleProperty("foo", "bar"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -98,7 +98,7 @@ def test_final_class_with_single_class_field(generate, dir_param):
schema.SingleProperty("foo", "Bar"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -118,7 +118,7 @@ def test_final_class_with_optional_field(generate, dir_param):
schema.OptionalProperty("foo", "bar"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -146,7 +146,7 @@ def test_final_class_with_repeated_field(generate, property_cls, dir_param):
property_cls("foo", "bar"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -174,7 +174,7 @@ def test_final_class_with_predicate_field(generate, dir_param):
schema.PredicateProperty("foo"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -205,7 +205,7 @@ def test_final_class_with_more_fields(generate, dir_param):
schema.PredicateProperty("six"),
]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Table(
@@ -259,7 +259,7 @@ def test_empty_class_with_derived(generate):
schema.Class(name="Left", bases=["Base"]),
schema.Class(name="Right", bases=["Base"]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union(
@@ -290,7 +290,7 @@ def test_class_with_derived_and_single_property(generate, dir_param):
schema.Class(name="Left", bases=["Base"]),
schema.Class(name="Right", bases=["Base"]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union(
@@ -330,7 +330,7 @@ def test_class_with_derived_and_optional_property(generate, dir_param):
schema.Class(name="Left", bases=["Base"]),
schema.Class(name="Right", bases=["Base"]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union(
@@ -370,7 +370,7 @@ def test_class_with_derived_and_repeated_property(generate, dir_param):
schema.Class(name="Left", bases=["Base"]),
schema.Class(name="Right", bases=["Base"]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union(
@@ -432,7 +432,7 @@ def test_null_class(generate):
bases=["Base"],
),
], null="Null") == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union(
@@ -514,7 +514,7 @@ def test_ipa_classes_ignored(generate):
schema.Class(name="B", ipa=schema.IpaInfo(from_class="A")),
schema.Class(name="C", ipa=schema.IpaInfo(on_arguments={"x": "A"})),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[],
)
@@ -526,7 +526,7 @@ def test_ipa_derived_classes_ignored(generate):
schema.Class(name="B", bases=["A"], ipa=schema.IpaInfo()),
schema.Class(name="C", bases=["A"]),
]) == dbscheme.Scheme(
- src=schema_file,
+ src=schema_file.name,
includes=[],
declarations=[
dbscheme.Union("@a", ["@c"]),
diff --git a/swift/codegen/test/test_dbscheme.py b/misc/codegen/test/test_dbschemelaoder.py
similarity index 67%
rename from swift/codegen/test/test_dbscheme.py
rename to misc/codegen/test/test_dbschemelaoder.py
index 9d9184ea954..ab4efbff75a 100644
--- a/swift/codegen/test/test_dbscheme.py
+++ b/misc/codegen/test/test_dbschemelaoder.py
@@ -1,61 +1,18 @@
import sys
from copy import deepcopy
-from swift.codegen.lib import dbscheme
-from swift.codegen.test.utils import *
+from misc.codegen.lib import dbscheme
+from misc.codegen.loaders.dbschemeloader import iterload
+from misc.codegen.test.utils import *
-def test_dbcolumn_name():
- assert dbscheme.Column("foo", "some_type").name == "foo"
-
-
-@pytest.mark.parametrize("keyword", dbscheme.dbscheme_keywords)
-def test_dbcolumn_keyword_name(keyword):
- assert dbscheme.Column(keyword, "some_type").name == keyword + "_"
-
-
-@pytest.mark.parametrize("type,binding,lhstype,rhstype", [
- ("builtin_type", False, "builtin_type", "builtin_type ref"),
- ("builtin_type", True, "builtin_type", "builtin_type ref"),
- ("@at_type", False, "int", "@at_type ref"),
- ("@at_type", True, "unique int", "@at_type"),
-])
-def test_dbcolumn_types(type, binding, lhstype, rhstype):
- col = dbscheme.Column("foo", type, binding)
- assert col.lhstype == lhstype
- assert col.rhstype == rhstype
-
-
-def test_keyset_has_first_id_marked():
- ids = ["a", "b", "c"]
- ks = dbscheme.KeySet(ids)
- assert ks.ids[0].first
- assert [id.id for id in ks.ids] == ids
-
-
-def test_table_has_first_column_marked():
- columns = [dbscheme.Column("a", "x"), dbscheme.Column("b", "y", binding=True), dbscheme.Column("c", "z")]
- expected = deepcopy(columns)
- table = dbscheme.Table("foo", columns)
- expected[0].first = True
- assert table.columns == expected
-
-
-def test_union_has_first_case_marked():
- rhs = ["a", "b", "c"]
- u = dbscheme.Union(lhs="x", rhs=rhs)
- assert u.rhs[0].first
- assert [c.type for c in u.rhs] == rhs
-
-
-# load tests
@pytest.fixture
def load(tmp_path):
file = tmp_path / "test.dbscheme"
def ret(yml):
write(file, yml)
- return list(dbscheme.iterload(file))
+ return list(iterload(file))
return ret
diff --git a/swift/codegen/test/test_ql.py b/misc/codegen/test/test_ql.py
similarity index 93%
rename from swift/codegen/test/test_ql.py
rename to misc/codegen/test/test_ql.py
index 5f5b1aebd73..0006bbb96e4 100644
--- a/swift/codegen/test/test_ql.py
+++ b/misc/codegen/test/test_ql.py
@@ -1,8 +1,8 @@
import sys
from copy import deepcopy
-from swift.codegen.lib import ql
-from swift.codegen.test.utils import *
+from misc.codegen.lib import ql
+from misc.codegen.test.utils import *
def test_property_has_first_table_param_marked():
@@ -149,5 +149,12 @@ def test_class_without_description():
assert prop.has_description is False
+def test_ipa_accessor_has_first_constructor_param_marked():
+ params = ["a", "b", "c"]
+ x = ql.IpaUnderlyingAccessor("foo", "bar", params)
+ assert x.constructorparams[0].first
+ assert [p.param for p in x.constructorparams] == params
+
+
if __name__ == '__main__':
sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/swift/codegen/test/test_qlgen.py b/misc/codegen/test/test_qlgen.py
similarity index 50%
rename from swift/codegen/test/test_qlgen.py
rename to misc/codegen/test/test_qlgen.py
index 7a290ba76b8..f16b1404c2e 100644
--- a/swift/codegen/test/test_qlgen.py
+++ b/misc/codegen/test/test_qlgen.py
@@ -4,9 +4,9 @@ import sys
import pytest
-from swift.codegen.generators import qlgen
-from swift.codegen.lib import ql
-from swift.codegen.test.utils import *
+from misc.codegen.generators import qlgen
+from misc.codegen.lib import ql
+from misc.codegen.test.utils import *
@pytest.fixture(autouse=True)
@@ -17,16 +17,16 @@ def run_mock():
# these are lambdas so that they will use patched paths when called
-def stub_path(): return paths.swift_dir / "ql/lib/stub/path"
+def stub_path(): return paths.root_dir / "ql/lib/stub/path"
-def ql_output_path(): return paths.swift_dir / "ql/lib/other/path"
+def ql_output_path(): return paths.root_dir / "ql/lib/other/path"
-def ql_test_output_path(): return paths.swift_dir / "ql/test/path"
+def ql_test_output_path(): return paths.root_dir / "ql/test/path"
-def generated_registry_path(): return paths.swift_dir / "registry.list"
+def generated_registry_path(): return paths.root_dir / "registry.list"
def import_file(): return stub_path().with_suffix(".qll")
@@ -35,9 +35,11 @@ def import_file(): return stub_path().with_suffix(".qll")
def children_file(): return ql_output_path() / "ParentChild.qll"
-stub_import_prefix = "stub.path."
+stub_import = "stub.path"
+stub_import_prefix = stub_import + "."
root_import = stub_import_prefix + "Element"
-gen_import_prefix = "other.path."
+gen_import = "other.path"
+gen_import_prefix = gen_import + "."
@pytest.fixture
@@ -47,7 +49,7 @@ def qlgen_opts(opts):
opts.ql_test_output = ql_test_output_path()
opts.generated_registry = generated_registry_path()
opts.ql_format = True
- opts.swift_dir = paths.swift_dir
+ opts.root_dir = paths.root_dir
opts.force = False
return opts
@@ -133,12 +135,20 @@ def generate_tests(generate):
return func
+def a_ql_class(**kwargs):
+ return ql.Class(**kwargs, import_prefix=gen_import)
+
+
+def a_ql_stub(**kwargs):
+ return ql.Stub(**kwargs, import_prefix=gen_import)
+
+
def test_one_empty_class(generate_classes):
assert generate_classes([
schema.Class("A")
]) == {
- "A.qll": (ql.Stub(name="A", base_import=gen_import_prefix + "A"),
- ql.Class(name="A", final=True)),
+ "A.qll": (a_ql_stub(name="A", base_import=gen_import_prefix + "A"),
+ a_ql_class(name="A", final=True)),
}
@@ -149,15 +159,15 @@ def test_hierarchy(generate_classes):
schema.Class("B", bases=["A"], derived={"D"}),
schema.Class("A", derived={"B", "C"}),
]) == {
- "A.qll": (ql.Stub(name="A", base_import=gen_import_prefix + "A"),
- ql.Class(name="A")),
- "B.qll": (ql.Stub(name="B", base_import=gen_import_prefix + "B"),
- ql.Class(name="B", bases=["A"], imports=[stub_import_prefix + "A"])),
- "C.qll": (ql.Stub(name="C", base_import=gen_import_prefix + "C"),
- ql.Class(name="C", bases=["A"], imports=[stub_import_prefix + "A"])),
- "D.qll": (ql.Stub(name="D", base_import=gen_import_prefix + "D"),
- ql.Class(name="D", final=True, bases=["B", "C"],
- imports=[stub_import_prefix + cls for cls in "BC"])),
+ "A.qll": (a_ql_stub(name="A", base_import=gen_import_prefix + "A"),
+ a_ql_class(name="A")),
+ "B.qll": (a_ql_stub(name="B", base_import=gen_import_prefix + "B"),
+ a_ql_class(name="B", bases=["A"], imports=[stub_import_prefix + "A"])),
+ "C.qll": (a_ql_stub(name="C", base_import=gen_import_prefix + "C"),
+ a_ql_class(name="C", bases=["A"], imports=[stub_import_prefix + "A"])),
+ "D.qll": (a_ql_stub(name="D", base_import=gen_import_prefix + "D"),
+ a_ql_class(name="D", final=True, bases=["B", "C"],
+ imports=[stub_import_prefix + cls for cls in "BC"])),
}
@@ -186,15 +196,15 @@ def test_hierarchy_children(generate_children_implementations):
schema.Class("C", bases=["A"], derived={"D"}, pragmas=["ql_internal"]),
schema.Class("D", bases=["B", "C"]),
]) == ql.GetParentImplementation(
- classes=[ql.Class(name="A", ql_internal=True),
- ql.Class(name="B", bases=["A"], imports=[
+ classes=[a_ql_class(name="A", ql_internal=True),
+ a_ql_class(name="B", bases=["A"], imports=[
stub_import_prefix + "A"]),
- ql.Class(name="C", bases=["A"], imports=[
+ a_ql_class(name="C", bases=["A"], imports=[
stub_import_prefix + "A"], ql_internal=True),
- ql.Class(name="D", final=True, bases=["B", "C"],
- imports=[stub_import_prefix + cls for cls in "BC"]),
+ a_ql_class(name="D", final=True, bases=["B", "C"],
+ imports=[stub_import_prefix + cls for cls in "BC"]),
],
- additional_imports=[stub_import_prefix + cls for cls in "AC"],
+ imports=[stub_import] + [stub_import_prefix + cls for cls in "AC"],
)
@@ -203,12 +213,12 @@ def test_single_property(generate_classes):
schema.Class("MyObject", properties=[
schema.SingleProperty("foo", "bar")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_objects",
- tableparams=["this", "result"], doc="foo of this my object"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_objects",
+ tableparams=["this", "result"], doc="foo of this my object"),
+ ])),
}
@@ -226,42 +236,45 @@ def test_children(generate_classes):
schema.RepeatedOptionalProperty("child_4", "int", is_child=True),
]),
]) == {
- "FakeRoot.qll": (ql.Stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
- ql.Class(name="FakeRoot", final=True)),
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="A", type="int", tablename="my_objects",
- tableparams=["this", "result", "_"],
- doc="a of this my object"),
- ql.Property(singular="Child1", type="int", tablename="my_objects",
- tableparams=["this", "_", "result"], prev_child="",
- doc="child 1 of this my object"),
- ql.Property(singular="B", plural="Bs", type="int",
- tablename="my_object_bs",
- tableparams=["this", "index", "result"],
- doc="b of this my object", doc_plural="bs of this my object"),
- ql.Property(singular="Child", plural="Children", type="int",
- tablename="my_object_children",
- tableparams=["this", "index", "result"], prev_child="Child1",
- doc="child of this my object",
- doc_plural="children of this my object"),
- ql.Property(singular="C", type="int", tablename="my_object_cs",
- tableparams=["this", "result"], is_optional=True,
- doc="c of this my object"),
- ql.Property(singular="Child3", type="int", tablename="my_object_child_3s",
- tableparams=["this", "result"], is_optional=True,
- prev_child="Child", doc="child 3 of this my object"),
- ql.Property(singular="D", plural="Ds", type="int",
- tablename="my_object_ds",
- tableparams=["this", "index", "result"], is_optional=True,
- doc="d of this my object", doc_plural="ds of this my object"),
- ql.Property(singular="Child4", plural="Child4s", type="int",
- tablename="my_object_child_4s",
- tableparams=["this", "index", "result"], is_optional=True,
- prev_child="Child3", doc="child 4 of this my object",
- doc_plural="child 4s of this my object"),
- ])),
+ "FakeRoot.qll": (a_ql_stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
+ a_ql_class(name="FakeRoot", final=True)),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="A", type="int", tablename="my_objects",
+ tableparams=["this", "result", "_"],
+ doc="a of this my object"),
+ ql.Property(singular="Child1", type="int", tablename="my_objects",
+ tableparams=["this", "_", "result"], prev_child="",
+ doc="child 1 of this my object"),
+ ql.Property(singular="B", plural="Bs", type="int",
+ tablename="my_object_bs",
+ tableparams=["this", "index", "result"],
+ doc="b of this my object",
+ doc_plural="bs of this my object"),
+ ql.Property(singular="Child", plural="Children", type="int",
+ tablename="my_object_children",
+ tableparams=["this", "index", "result"], prev_child="Child1",
+ doc="child of this my object",
+ doc_plural="children of this my object"),
+ ql.Property(singular="C", type="int", tablename="my_object_cs",
+ tableparams=["this", "result"], is_optional=True,
+ doc="c of this my object"),
+ ql.Property(singular="Child3", type="int",
+ tablename="my_object_child_3s",
+ tableparams=["this", "result"], is_optional=True,
+ prev_child="Child", doc="child 3 of this my object"),
+ ql.Property(singular="D", plural="Ds", type="int",
+ tablename="my_object_ds",
+ tableparams=["this", "index", "result"], is_optional=True,
+ doc="d of this my object",
+ doc_plural="ds of this my object"),
+ ql.Property(singular="Child4", plural="Child4s", type="int",
+ tablename="my_object_child_4s",
+ tableparams=["this", "index", "result"], is_optional=True,
+ prev_child="Child3", doc="child 4 of this my object",
+ doc_plural="child 4s of this my object"),
+ ])),
}
@@ -273,19 +286,19 @@ def test_single_properties(generate_classes):
schema.SingleProperty("three", "z"),
]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="One", type="x", tablename="my_objects",
- tableparams=["this", "result", "_", "_"],
- doc="one of this my object"),
- ql.Property(singular="Two", type="y", tablename="my_objects",
- tableparams=["this", "_", "result", "_"],
- doc="two of this my object"),
- ql.Property(singular="Three", type="z", tablename="my_objects",
- tableparams=["this", "_", "_", "result"],
- doc="three of this my object"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="One", type="x", tablename="my_objects",
+ tableparams=["this", "result", "_", "_"],
+ doc="one of this my object"),
+ ql.Property(singular="Two", type="y", tablename="my_objects",
+ tableparams=["this", "_", "result", "_"],
+ doc="two of this my object"),
+ ql.Property(singular="Three", type="z", tablename="my_objects",
+ tableparams=["this", "_", "_", "result"],
+ doc="three of this my object"),
+ ])),
}
@@ -296,14 +309,14 @@ def test_optional_property(generate_classes, is_child, prev_child):
schema.Class("MyObject", properties=[
schema.OptionalProperty("foo", "bar", is_child=is_child)]),
]) == {
- "FakeRoot.qll": (ql.Stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
- ql.Class(name="FakeRoot", final=True)),
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True, properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_object_foos",
- tableparams=["this", "result"],
- is_optional=True, prev_child=prev_child, doc="foo of this my object"),
- ])),
+ "FakeRoot.qll": (a_ql_stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
+ a_ql_class(name="FakeRoot", final=True)),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True, properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_object_foos",
+ tableparams=["this", "result"],
+ is_optional=True, prev_child=prev_child, doc="foo of this my object"),
+ ])),
}
@@ -314,14 +327,14 @@ def test_repeated_property(generate_classes, is_child, prev_child):
schema.Class("MyObject", properties=[
schema.RepeatedProperty("foo", "bar", is_child=is_child)]),
]) == {
- "FakeRoot.qll": (ql.Stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
- ql.Class(name="FakeRoot", final=True)),
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True, properties=[
- ql.Property(singular="Foo", plural="Foos", type="bar", tablename="my_object_foos",
- tableparams=["this", "index", "result"], prev_child=prev_child,
- doc="foo of this my object", doc_plural="foos of this my object"),
- ])),
+ "FakeRoot.qll": (a_ql_stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
+ a_ql_class(name="FakeRoot", final=True)),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True, properties=[
+ ql.Property(singular="Foo", plural="Foos", type="bar", tablename="my_object_foos",
+ tableparams=["this", "index", "result"], prev_child=prev_child,
+ doc="foo of this my object", doc_plural="foos of this my object"),
+ ])),
}
@@ -333,15 +346,15 @@ def test_repeated_optional_property(generate_classes, is_child, prev_child):
schema.RepeatedOptionalProperty("foo", "bar", is_child=is_child)]),
]) == {
- "FakeRoot.qll": (ql.Stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
- ql.Class(name="FakeRoot", final=True)),
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True, properties=[
- ql.Property(singular="Foo", plural="Foos", type="bar", tablename="my_object_foos",
- tableparams=["this", "index", "result"], is_optional=True,
- prev_child=prev_child, doc="foo of this my object",
- doc_plural="foos of this my object"),
- ])),
+ "FakeRoot.qll": (a_ql_stub(name="FakeRoot", base_import=gen_import_prefix + "FakeRoot"),
+ a_ql_class(name="FakeRoot", final=True)),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True, properties=[
+ ql.Property(singular="Foo", plural="Foos", type="bar", tablename="my_object_foos",
+ tableparams=["this", "index", "result"], is_optional=True,
+ prev_child=prev_child, doc="foo of this my object",
+ doc_plural="foos of this my object"),
+ ])),
}
@@ -350,11 +363,11 @@ def test_predicate_property(generate_classes):
schema.Class("MyObject", properties=[
schema.PredicateProperty("is_foo")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True, properties=[
- ql.Property(singular="isFoo", type="predicate", tablename="my_object_is_foo",
- tableparams=["this"], is_predicate=True, doc="this my object is foo"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True, properties=[
+ ql.Property(singular="isFoo", type="predicate", tablename="my_object_is_foo",
+ tableparams=["this"], is_predicate=True, doc="this my object is foo"),
+ ])),
}
@@ -365,8 +378,8 @@ def test_single_class_property(generate_classes, is_child, prev_child):
schema.Class("MyObject", properties=[
schema.SingleProperty("foo", "Bar", is_child=is_child)]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(
name="MyObject", final=True, imports=[stub_import_prefix + "Bar"], properties=[
ql.Property(singular="Foo", type="Bar", tablename="my_objects",
tableparams=[
@@ -374,8 +387,8 @@ def test_single_class_property(generate_classes, is_child, prev_child):
prev_child=prev_child, doc="foo of this my object"),
],
)),
- "Bar.qll": (ql.Stub(name="Bar", base_import=gen_import_prefix + "Bar"),
- ql.Class(name="Bar", final=True)),
+ "Bar.qll": (a_ql_stub(name="Bar", base_import=gen_import_prefix + "Bar"),
+ a_ql_class(name="Bar", final=True)),
}
@@ -384,8 +397,8 @@ def test_class_with_doc(generate_classes):
assert generate_classes([
schema.Class("A", doc=doc),
]) == {
- "A.qll": (ql.Stub(name="A", base_import=gen_import_prefix + "A"),
- ql.Class(name="A", final=True, doc=doc)),
+ "A.qll": (a_ql_stub(name="A", base_import=gen_import_prefix + "A"),
+ a_ql_class(name="A", final=True, doc=doc)),
}
@@ -395,11 +408,11 @@ def test_class_dir(generate_classes):
schema.Class("A", derived={"B"}, group=dir),
schema.Class("B", bases=["A"]),
]) == {
- f"{dir}/A.qll": (ql.Stub(name="A", base_import=gen_import_prefix + "another.rel.path.A"),
- ql.Class(name="A", dir=pathlib.Path(dir))),
- "B.qll": (ql.Stub(name="B", base_import=gen_import_prefix + "B"),
- ql.Class(name="B", final=True, bases=["A"],
- imports=[stub_import_prefix + "another.rel.path.A"])),
+ f"{dir}/A.qll": (a_ql_stub(name="A", base_import=gen_import_prefix + "another.rel.path.A"),
+ a_ql_class(name="A", dir=pathlib.Path(dir))),
+ "B.qll": (a_ql_stub(name="B", base_import=gen_import_prefix + "B"),
+ a_ql_class(name="B", final=True, bases=["A"],
+ imports=[stub_import_prefix + "another.rel.path.A"])),
}
@@ -487,12 +500,20 @@ def test_test_missing_source(generate_tests):
}
+def a_ql_class_tester(**kwargs):
+ return ql.ClassTester(**kwargs, elements_module=stub_import)
+
+
+def a_ql_property_tester(**kwargs):
+ return ql.PropertyTester(**kwargs, elements_module=stub_import)
+
+
def test_test_source_present(opts, generate_tests):
write(opts.ql_test_output / "A" / "test.swift")
assert generate_tests([
schema.Class("A"),
]) == {
- "A/A.ql": ql.ClassTester(class_name="A"),
+ "A/A.ql": a_ql_class_tester(class_name="A"),
}
@@ -501,7 +522,7 @@ def test_test_source_present_with_dir(opts, generate_tests):
assert generate_tests([
schema.Class("A", group="foo"),
]) == {
- "foo/A/A.ql": ql.ClassTester(class_name="A"),
+ "foo/A/A.ql": a_ql_class_tester(class_name="A"),
}
@@ -515,7 +536,7 @@ def test_test_total_properties(opts, generate_tests):
schema.PredicateProperty("y", "int"),
]),
]) == {
- "B/B.ql": ql.ClassTester(class_name="B", properties=[
+ "B/B.ql": a_ql_class_tester(class_name="B", properties=[
ql.PropertyForTest(getter="getX", type="string"),
ql.PropertyForTest(getter="y"),
])
@@ -533,21 +554,21 @@ def test_test_partial_properties(opts, generate_tests):
schema.RepeatedOptionalProperty("z", "int"),
]),
]) == {
- "B/B.ql": ql.ClassTester(class_name="B", properties=[
+ "B/B.ql": a_ql_class_tester(class_name="B", properties=[
ql.PropertyForTest(getter="hasX"),
ql.PropertyForTest(getter="getNumberOfYs", type="int"),
]),
- "B/B_getX.ql": ql.PropertyTester(class_name="B",
- property=ql.PropertyForTest(getter="getX", is_total=False,
- type="string")),
- "B/B_getY.ql": ql.PropertyTester(class_name="B",
- property=ql.PropertyForTest(getter="getY", is_total=False,
- is_repeated=True,
- type="bool")),
- "B/B_getZ.ql": ql.PropertyTester(class_name="B",
- property=ql.PropertyForTest(getter="getZ", is_total=False,
- is_repeated=True,
- type="int")),
+ "B/B_getX.ql": a_ql_property_tester(class_name="B",
+ property=ql.PropertyForTest(getter="getX", is_total=False,
+ type="string")),
+ "B/B_getY.ql": a_ql_property_tester(class_name="B",
+ property=ql.PropertyForTest(getter="getY", is_total=False,
+ is_repeated=True,
+ type="bool")),
+ "B/B_getZ.ql": a_ql_property_tester(class_name="B",
+ property=ql.PropertyForTest(getter="getZ", is_total=False,
+ is_repeated=True,
+ type="int")),
}
@@ -562,14 +583,14 @@ def test_test_properties_deduplicated(opts, generate_tests):
schema.Class("B", bases=["Base"], derived={"Final"}),
schema.Class("Final", bases=["A", "B"]),
]) == {
- "Final/Final.ql": ql.ClassTester(class_name="Final", properties=[
+ "Final/Final.ql": a_ql_class_tester(class_name="Final", properties=[
ql.PropertyForTest(getter="getX", type="string"),
ql.PropertyForTest(getter="getNumberOfYs", type="int"),
]),
- "Final/Final_getY.ql": ql.PropertyTester(class_name="Final",
- property=ql.PropertyForTest(getter="getY", is_total=False,
- is_repeated=True,
- type="bool")),
+ "Final/Final_getY.ql": a_ql_property_tester(class_name="Final",
+ property=ql.PropertyForTest(getter="getY", is_total=False,
+ is_repeated=True,
+ type="bool")),
}
@@ -586,7 +607,7 @@ def test_test_properties_skipped(opts, generate_tests):
"b", "int", pragmas=["bar", "qltest_skip", "baz"]),
]),
]) == {
- "Derived/Derived.ql": ql.ClassTester(class_name="Derived"),
+ "Derived/Derived.ql": a_ql_class_tester(class_name="Derived"),
}
@@ -599,7 +620,7 @@ def test_test_base_class_skipped(opts, generate_tests):
]),
schema.Class("Derived", bases=["Base"]),
]) == {
- "Derived/Derived.ql": ql.ClassTester(class_name="Derived"),
+ "Derived/Derived.ql": a_ql_class_tester(class_name="Derived"),
}
@@ -622,7 +643,7 @@ def test_test_class_hierarchy_collapse(opts, generate_tests):
schema.Class("D2", bases=["Base"], derived={"D3"}, properties=[schema.SingleProperty("y", "string")]),
schema.Class("D3", bases=["D2"], properties=[schema.SingleProperty("z", "string")]),
]) == {
- "Base/Base.ql": ql.ClassTester(class_name="Base", show_ql_class=True),
+ "Base/Base.ql": a_ql_class_tester(class_name="Base", show_ql_class=True),
}
@@ -636,9 +657,9 @@ def test_test_class_hierarchy_uncollapse(opts, generate_tests):
schema.Class("D3", bases=["D2"]),
schema.Class("D4", bases=["D2"]),
]) == {
- "Base/Base.ql": ql.ClassTester(class_name="Base", show_ql_class=True),
- "D3/D3.ql": ql.ClassTester(class_name="D3"),
- "D4/D4.ql": ql.ClassTester(class_name="D4"),
+ "Base/Base.ql": a_ql_class_tester(class_name="Base", show_ql_class=True),
+ "D3/D3.ql": a_ql_class_tester(class_name="D3"),
+ "D4/D4.ql": a_ql_class_tester(class_name="D4"),
}
@@ -651,8 +672,8 @@ def test_test_class_hierarchy_uncollapse_at_final(opts, generate_tests):
schema.Class("D2", bases=["Base"], derived={"D3"}),
schema.Class("D3", bases=["D2"], pragmas=["qltest_uncollapse_hierarchy", "bar"]),
]) == {
- "Base/Base.ql": ql.ClassTester(class_name="Base", show_ql_class=True),
- "D3/D3.ql": ql.ClassTester(class_name="D3"),
+ "Base/Base.ql": a_ql_class_tester(class_name="Base", show_ql_class=True),
+ "D3/D3.ql": a_ql_class_tester(class_name="D3"),
}
@@ -663,14 +684,14 @@ def test_property_description(generate_classes):
schema.SingleProperty("foo", "bar", description=description),
]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_objects",
- tableparams=["this", "result"],
- doc="foo of this my object",
- description=description),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_objects",
+ tableparams=["this", "result"],
+ doc="foo of this my object",
+ description=description),
+ ])),
}
@@ -679,12 +700,12 @@ def test_property_doc_override(generate_classes):
schema.Class("MyObject", properties=[
schema.SingleProperty("foo", "bar", doc="baz")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_objects",
- tableparams=["this", "result"], doc="baz"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_objects",
+ tableparams=["this", "result"], doc="baz"),
+ ])),
}
@@ -694,18 +715,18 @@ def test_repeated_property_doc_override(generate_classes):
schema.RepeatedProperty("x", "int", doc="children of this"),
schema.RepeatedOptionalProperty("y", "int", doc="child of this")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="X", plural="Xes", type="int",
- tablename="my_object_xes",
- tableparams=["this", "index", "result"],
- doc="child of this", doc_plural="children of this"),
- ql.Property(singular="Y", plural="Ys", type="int",
- tablename="my_object_ies", is_optional=True,
- tableparams=["this", "index", "result"],
- doc="child of this", doc_plural="children of this"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="X", plural="Xes", type="int",
+ tablename="my_object_xes",
+ tableparams=["this", "index", "result"],
+ doc="child of this", doc_plural="children of this"),
+ ql.Property(singular="Y", plural="Ys", type="int",
+ tablename="my_object_ies", is_optional=True,
+ tableparams=["this", "index", "result"],
+ doc="child of this", doc_plural="children of this"),
+ ])),
}
@@ -716,13 +737,13 @@ def test_property_doc_abbreviations(generate_classes, abbr, expected):
schema.Class("Object", properties=[
schema.SingleProperty(f"foo_{abbr}_bar", "baz")]),
]) == {
- "Object.qll": (ql.Stub(name="Object", base_import=gen_import_prefix + "Object"),
- ql.Class(name="Object", final=True,
- properties=[
- ql.Property(singular=f"Foo{abbr.capitalize()}Bar", type="baz",
- tablename="objects",
- tableparams=["this", "result"], doc=expected_doc),
- ])),
+ "Object.qll": (a_ql_stub(name="Object", base_import=gen_import_prefix + "Object"),
+ a_ql_class(name="Object", final=True,
+ properties=[
+ ql.Property(singular=f"Foo{abbr.capitalize()}Bar", type="baz",
+ tablename="objects",
+ tableparams=["this", "result"], doc=expected_doc),
+ ])),
}
@@ -733,13 +754,13 @@ def test_property_doc_abbreviations_ignored_if_within_word(generate_classes, abb
schema.Class("Object", properties=[
schema.SingleProperty(f"foo_{abbr}acadabra_bar", "baz")]),
]) == {
- "Object.qll": (ql.Stub(name="Object", base_import=gen_import_prefix + "Object"),
- ql.Class(name="Object", final=True,
- properties=[
- ql.Property(singular=f"Foo{abbr.capitalize()}acadabraBar", type="baz",
- tablename="objects",
- tableparams=["this", "result"], doc=expected_doc),
- ])),
+ "Object.qll": (a_ql_stub(name="Object", base_import=gen_import_prefix + "Object"),
+ a_ql_class(name="Object", final=True,
+ properties=[
+ ql.Property(singular=f"Foo{abbr.capitalize()}acadabraBar", type="baz",
+ tablename="objects",
+ tableparams=["this", "result"], doc=expected_doc),
+ ])),
}
@@ -749,20 +770,20 @@ def test_repeated_property_doc_override_with_format(generate_classes):
schema.RepeatedProperty("x", "int", doc="special {children} of this"),
schema.RepeatedOptionalProperty("y", "int", doc="special {child} of this")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="X", plural="Xes", type="int",
- tablename="my_object_xes",
- tableparams=["this", "index", "result"],
- doc="special child of this",
- doc_plural="special children of this"),
- ql.Property(singular="Y", plural="Ys", type="int",
- tablename="my_object_ies", is_optional=True,
- tableparams=["this", "index", "result"],
- doc="special child of this",
- doc_plural="special children of this"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="X", plural="Xes", type="int",
+ tablename="my_object_xes",
+ tableparams=["this", "index", "result"],
+ doc="special child of this",
+ doc_plural="special children of this"),
+ ql.Property(singular="Y", plural="Ys", type="int",
+ tablename="my_object_ies", is_optional=True,
+ tableparams=["this", "index", "result"],
+ doc="special child of this",
+ doc_plural="special children of this"),
+ ])),
}
@@ -772,18 +793,18 @@ def test_repeated_property_doc_override_with_multiple_formats(generate_classes):
schema.RepeatedProperty("x", "int", doc="{cat} or {dog}"),
schema.RepeatedOptionalProperty("y", "int", doc="{cats} or {dogs}")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="X", plural="Xes", type="int",
- tablename="my_object_xes",
- tableparams=["this", "index", "result"],
- doc="cat or dog", doc_plural="cats or dogs"),
- ql.Property(singular="Y", plural="Ys", type="int",
- tablename="my_object_ies", is_optional=True,
- tableparams=["this", "index", "result"],
- doc="cat or dog", doc_plural="cats or dogs"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="X", plural="Xes", type="int",
+ tablename="my_object_xes",
+ tableparams=["this", "index", "result"],
+ doc="cat or dog", doc_plural="cats or dogs"),
+ ql.Property(singular="Y", plural="Ys", type="int",
+ tablename="my_object_ies", is_optional=True,
+ tableparams=["this", "index", "result"],
+ doc="cat or dog", doc_plural="cats or dogs"),
+ ])),
}
@@ -792,12 +813,12 @@ def test_property_doc_override_with_format(generate_classes):
schema.Class("MyObject", properties=[
schema.SingleProperty("foo", "bar", doc="special {baz} of this")]),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_objects",
- tableparams=["this", "result"], doc="special baz of this"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_objects",
+ tableparams=["this", "result"], doc="special baz of this"),
+ ])),
}
@@ -807,12 +828,36 @@ def test_property_on_class_with_default_doc_name(generate_classes):
schema.SingleProperty("foo", "bar")],
default_doc_name="baz"),
]) == {
- "MyObject.qll": (ql.Stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
- ql.Class(name="MyObject", final=True,
- properties=[
- ql.Property(singular="Foo", type="bar", tablename="my_objects",
- tableparams=["this", "result"], doc="foo of this baz"),
- ])),
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject"),
+ a_ql_class(name="MyObject", final=True,
+ properties=[
+ ql.Property(singular="Foo", type="bar", tablename="my_objects",
+ tableparams=["this", "result"], doc="foo of this baz"),
+ ])),
+ }
+
+
+def test_stub_on_class_with_ipa_from_class(generate_classes):
+ assert generate_classes([
+ schema.Class("MyObject", ipa=schema.IpaInfo(from_class="A")),
+ ]) == {
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject", ipa_accessors=[
+ ql.IpaUnderlyingAccessor(argument="Entity", type="Raw::A", constructorparams=["result"]),
+ ]),
+ a_ql_class(name="MyObject", final=True, ipa=True)),
+ }
+
+
+def test_stub_on_class_with_ipa_on_arguments(generate_classes):
+ assert generate_classes([
+ schema.Class("MyObject", ipa=schema.IpaInfo(on_arguments={"base": "A", "index": "int", "label": "string"})),
+ ]) == {
+ "MyObject.qll": (a_ql_stub(name="MyObject", base_import=gen_import_prefix + "MyObject", ipa_accessors=[
+ ql.IpaUnderlyingAccessor(argument="Base", type="Raw::A", constructorparams=["result", "_", "_"]),
+ ql.IpaUnderlyingAccessor(argument="Index", type="int", constructorparams=["_", "result", "_"]),
+ ql.IpaUnderlyingAccessor(argument="Label", type="string", constructorparams=["_", "_", "result"]),
+ ]),
+ a_ql_class(name="MyObject", final=True, ipa=True)),
}
diff --git a/swift/codegen/test/test_render.py b/misc/codegen/test/test_render.py
similarity index 75%
rename from swift/codegen/test/test_render.py
rename to misc/codegen/test/test_render.py
index 950ca385e2f..f9129178f04 100644
--- a/swift/codegen/test/test_render.py
+++ b/misc/codegen/test/test_render.py
@@ -2,10 +2,12 @@ import sys
import pytest
-from swift.codegen.test.utils import *
+from misc.codegen.test.utils import *
import hashlib
+generator = "foo"
+
@pytest.fixture
def pystache_renderer_cls():
@@ -22,7 +24,7 @@ def pystache_renderer(pystache_renderer_cls):
@pytest.fixture
def sut(pystache_renderer):
- return render.Renderer(paths.swift_dir)
+ return render.Renderer(generator, paths.root_dir)
def assert_file(file, text):
@@ -48,12 +50,12 @@ def test_render(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
text = "some text"
pystache_renderer.render_name.side_effect = (text,)
- output = paths.swift_dir / "some/output.txt"
+ output = paths.root_dir / "some/output.txt"
sut.render(data, output)
assert_file(output, text)
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
@@ -61,8 +63,8 @@ def test_managed_render(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
text = "some text"
pystache_renderer.render_name.side_effect = (text,)
- output = paths.swift_dir / "some/output.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ registry = paths.root_dir / "a/registry.list"
write(registry)
with sut.manage(generated=(), stubs=(), registry=registry) as renderer:
@@ -72,7 +74,7 @@ def test_managed_render(pystache_renderer, sut):
assert_file(registry, f"some/output.txt {hash(text)} {hash(text)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
@@ -80,8 +82,8 @@ def test_managed_render_with_no_registry(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
text = "some text"
pystache_renderer.render_name.side_effect = (text,)
- output = paths.swift_dir / "some/output.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ registry = paths.root_dir / "a/registry.list"
with sut.manage(generated=(), stubs=(), registry=registry) as renderer:
renderer.render(data, output)
@@ -90,7 +92,7 @@ def test_managed_render_with_no_registry(pystache_renderer, sut):
assert_file(registry, f"some/output.txt {hash(text)} {hash(text)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
@@ -99,8 +101,8 @@ def test_managed_render_with_post_processing(pystache_renderer, sut):
text = "some text"
postprocessed_text = "some other text"
pystache_renderer.render_name.side_effect = (text,)
- output = paths.swift_dir / "some/output.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ registry = paths.root_dir / "a/registry.list"
write(registry)
with sut.manage(generated=(), stubs=(), registry=registry) as renderer:
@@ -111,14 +113,14 @@ def test_managed_render_with_post_processing(pystache_renderer, sut):
assert_file(registry, f"some/output.txt {hash(text)} {hash(postprocessed_text)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
def test_managed_render_with_erasing(pystache_renderer, sut):
- output = paths.swift_dir / "some/output.txt"
- stub = paths.swift_dir / "some/stub.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ stub = paths.root_dir / "some/stub.txt"
+ registry = paths.root_dir / "a/registry.list"
write(output)
write(stub, "// generated bla bla")
write(registry)
@@ -134,9 +136,9 @@ def test_managed_render_with_erasing(pystache_renderer, sut):
def test_managed_render_with_skipping_of_generated_file(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
- output = paths.swift_dir / "some/output.txt"
+ output = paths.root_dir / "some/output.txt"
some_output = "some output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(output, some_output)
write(registry, f"some/output.txt {hash(some_output)} {hash(some_output)}\n")
@@ -149,16 +151,16 @@ def test_managed_render_with_skipping_of_generated_file(pystache_renderer, sut):
assert_file(registry, f"some/output.txt {hash(some_output)} {hash(some_output)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
def test_managed_render_with_skipping_of_stub_file(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
- stub = paths.swift_dir / "some/stub.txt"
+ stub = paths.root_dir / "some/stub.txt"
some_output = "// generated some output"
some_processed_output = "// generated some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(stub, some_processed_output)
write(registry, f"some/stub.txt {hash(some_output)} {hash(some_processed_output)}\n")
@@ -171,14 +173,14 @@ def test_managed_render_with_skipping_of_stub_file(pystache_renderer, sut):
assert_file(registry, f"some/stub.txt {hash(some_output)} {hash(some_processed_output)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
def test_managed_render_with_modified_generated_file(pystache_renderer, sut):
- output = paths.swift_dir / "some/output.txt"
+ output = paths.root_dir / "some/output.txt"
some_processed_output = "// some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(output, "// something else")
write(registry, f"some/output.txt whatever {hash(some_processed_output)}\n")
@@ -187,9 +189,9 @@ def test_managed_render_with_modified_generated_file(pystache_renderer, sut):
def test_managed_render_with_modified_stub_file_still_marked_as_generated(pystache_renderer, sut):
- stub = paths.swift_dir / "some/stub.txt"
+ stub = paths.root_dir / "some/stub.txt"
some_processed_output = "// generated some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(stub, "// generated something else")
write(registry, f"some/stub.txt whatever {hash(some_processed_output)}\n")
@@ -198,9 +200,9 @@ def test_managed_render_with_modified_stub_file_still_marked_as_generated(pystac
def test_managed_render_with_modified_stub_file_not_marked_as_generated(pystache_renderer, sut):
- stub = paths.swift_dir / "some/stub.txt"
+ stub = paths.root_dir / "some/stub.txt"
some_processed_output = "// generated some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(stub, "// no more generated")
write(registry, f"some/stub.txt whatever {hash(some_processed_output)}\n")
@@ -214,29 +216,46 @@ class MyError(Exception):
pass
-def test_managed_render_exception_drops_written_from_registry(pystache_renderer, sut):
+def test_managed_render_exception_drops_written_and_inexsistent_from_registry(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
text = "some text"
pystache_renderer.render_name.side_effect = (text,)
- output = paths.swift_dir / "some/output.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ registry = paths.root_dir / "x/registry.list"
write(output, text)
+ write(paths.root_dir / "a")
+ write(paths.root_dir / "c")
write(registry, "a a a\n"
f"some/output.txt whatever {hash(text)}\n"
- "b b b")
+ "b b b\n"
+ "c c c")
with pytest.raises(MyError):
with sut.manage(generated=(), stubs=(), registry=registry) as renderer:
renderer.render(data, output)
raise MyError
- assert_file(registry, "a a a\nb b b\n")
+ assert_file(registry, "a a a\nc c c\n")
+
+
+def test_managed_render_drops_inexsistent_from_registry(pystache_renderer, sut):
+ registry = paths.root_dir / "x/registry.list"
+ write(paths.root_dir / "a")
+ write(paths.root_dir / "c")
+ write(registry, f"a {hash('')} {hash('')}\n"
+ "b b b\n"
+ f"c {hash('')} {hash('')}")
+
+ with sut.manage(generated=(), stubs=(), registry=registry):
+ pass
+
+ assert_file(registry, f"a {hash('')} {hash('')}\nc {hash('')} {hash('')}\n")
def test_managed_render_exception_does_not_erase(pystache_renderer, sut):
- output = paths.swift_dir / "some/output.txt"
- stub = paths.swift_dir / "some/stub.txt"
- registry = paths.swift_dir / "a/registry.list"
+ output = paths.root_dir / "some/output.txt"
+ stub = paths.root_dir / "some/stub.txt"
+ registry = paths.root_dir / "a/registry.list"
write(output)
write(stub, "// generated bla bla")
write(registry)
@@ -260,7 +279,7 @@ def test_render_with_extensions(pystache_renderer, sut):
sut.render(data, output)
expected_templates = ["test_template_foo", "test_template_bar", "test_template_baz"]
assert pystache_renderer.mock_calls == [
- mock.call.render_name(t, data, generator=paths.exe_file.relative_to(paths.swift_dir))
+ mock.call.render_name(t, data, generator=generator)
for t in expected_templates
]
for expected_output, expected_contents in zip(expected_outputs, rendered):
@@ -269,9 +288,9 @@ def test_render_with_extensions(pystache_renderer, sut):
def test_managed_render_with_force_not_skipping_generated_file(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
- output = paths.swift_dir / "some/output.txt"
+ output = paths.root_dir / "some/output.txt"
some_output = "some output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(output, some_output)
write(registry, f"some/output.txt {hash(some_output)} {hash(some_output)}\n")
@@ -284,16 +303,16 @@ def test_managed_render_with_force_not_skipping_generated_file(pystache_renderer
assert_file(registry, f"some/output.txt {hash(some_output)} {hash(some_output)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
def test_managed_render_with_force_not_skipping_stub_file(pystache_renderer, sut):
data = mock.Mock(spec=("template",))
- stub = paths.swift_dir / "some/stub.txt"
+ stub = paths.root_dir / "some/stub.txt"
some_output = "// generated some output"
some_processed_output = "// generated some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(stub, some_processed_output)
write(registry, f"some/stub.txt {hash(some_output)} {hash(some_processed_output)}\n")
@@ -306,14 +325,14 @@ def test_managed_render_with_force_not_skipping_stub_file(pystache_renderer, sut
assert_file(registry, f"some/stub.txt {hash(some_output)} {hash(some_output)}\n")
assert pystache_renderer.mock_calls == [
- mock.call.render_name(data.template, data, generator=paths.exe_file.relative_to(paths.swift_dir)),
+ mock.call.render_name(data.template, data, generator=generator),
]
def test_managed_render_with_force_ignores_modified_generated_file(sut):
- output = paths.swift_dir / "some/output.txt"
+ output = paths.root_dir / "some/output.txt"
some_processed_output = "// some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(output, "// something else")
write(registry, f"some/output.txt whatever {hash(some_processed_output)}\n")
@@ -322,9 +341,9 @@ def test_managed_render_with_force_ignores_modified_generated_file(sut):
def test_managed_render_with_force_ignores_modified_stub_file_still_marked_as_generated(sut):
- stub = paths.swift_dir / "some/stub.txt"
+ stub = paths.root_dir / "some/stub.txt"
some_processed_output = "// generated some processed output"
- registry = paths.swift_dir / "a/registry.list"
+ registry = paths.root_dir / "a/registry.list"
write(stub, "// generated something else")
write(registry, f"some/stub.txt whatever {hash(some_processed_output)}\n")
diff --git a/swift/codegen/test/test_schema.py b/misc/codegen/test/test_schemaloader.py
similarity index 94%
rename from swift/codegen/test/test_schema.py
rename to misc/codegen/test/test_schemaloader.py
index b5f5f7d1c1a..9724a82da8f 100644
--- a/swift/codegen/test/test_schema.py
+++ b/misc/codegen/test/test_schemaloader.py
@@ -2,12 +2,13 @@ import sys
import pytest
-from swift.codegen.test.utils import *
-from swift.codegen.lib.schema import defs
+from misc.codegen.test.utils import *
+from misc.codegen.lib import schemadefs as defs
+from misc.codegen.loaders.schemaloader import load
def test_empty_schema():
- @schema.load
+ @load
class data:
pass
@@ -18,7 +19,7 @@ def test_empty_schema():
def test_one_empty_class():
- @schema.load
+ @load
class data:
class MyClass:
pass
@@ -30,7 +31,7 @@ def test_one_empty_class():
def test_two_empty_classes():
- @schema.load
+ @load
class data:
class MyClass1:
pass
@@ -50,7 +51,7 @@ def test_no_external_bases():
pass
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class MyClass(A):
pass
@@ -58,7 +59,7 @@ def test_no_external_bases():
def test_no_multiple_roots():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class MyClass1:
pass
@@ -68,7 +69,7 @@ def test_no_multiple_roots():
def test_empty_classes_diamond():
- @schema.load
+ @load
class data:
class A:
pass
@@ -92,7 +93,7 @@ def test_empty_classes_diamond():
#
def test_group():
- @schema.load
+ @load
class data:
@defs.group("xxx")
class A:
@@ -104,7 +105,7 @@ def test_group():
def test_group_is_inherited():
- @schema.load
+ @load
class data:
class A:
pass
@@ -129,7 +130,7 @@ def test_group_is_inherited():
def test_no_mixed_groups_in_bases():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class A:
pass
@@ -151,14 +152,14 @@ def test_no_mixed_groups_in_bases():
def test_lowercase_rejected():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class aLowerCase:
pass
def test_properties():
- @schema.load
+ @load
class data:
class A:
one: defs.string
@@ -182,7 +183,7 @@ def test_class_properties():
class A:
pass
- @schema.load
+ @load
class data:
class A:
pass
@@ -205,7 +206,7 @@ def test_class_properties():
def test_string_reference_class_properties():
- @schema.load
+ @load
class data:
class A:
one: "A"
@@ -227,14 +228,14 @@ def test_string_reference_class_properties():
lambda t: defs.list[defs.optional[t]]])
def test_string_reference_dangling(spec):
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class A:
x: spec("B")
def test_children():
- @schema.load
+ @load
class data:
class A:
one: "A" | defs.child
@@ -255,7 +256,7 @@ def test_children():
@pytest.mark.parametrize("spec", [defs.string, defs.int, defs.boolean, defs.predicate])
def test_builtin_and_predicate_children_not_allowed(spec):
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class A:
x: spec | defs.child
@@ -271,7 +272,7 @@ _pragmas = [(defs.qltest.skip, "qltest_skip"),
@pytest.mark.parametrize("pragma,expected", _pragmas)
def test_property_with_pragma(pragma, expected):
- @schema.load
+ @load
class data:
class A:
x: defs.string | pragma
@@ -288,7 +289,7 @@ def test_property_with_pragmas():
for pragma, _ in _pragmas:
spec |= pragma
- @schema.load
+ @load
class data:
class A:
x: spec
@@ -302,7 +303,7 @@ def test_property_with_pragmas():
@pytest.mark.parametrize("pragma,expected", _pragmas)
def test_class_with_pragma(pragma, expected):
- @schema.load
+ @load
class data:
@pragma
class A:
@@ -318,7 +319,7 @@ def test_class_with_pragmas():
for p, _ in _pragmas:
p(cls)
- @schema.load
+ @load
class data:
class A:
pass
@@ -331,7 +332,7 @@ def test_class_with_pragmas():
def test_ipa_from_class():
- @schema.load
+ @load
class data:
class A:
pass
@@ -347,7 +348,7 @@ def test_ipa_from_class():
def test_ipa_from_class_ref():
- @schema.load
+ @load
class data:
@defs.synth.from_class("B")
class A:
@@ -364,7 +365,7 @@ def test_ipa_from_class_ref():
def test_ipa_from_class_dangling():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
@defs.synth.from_class("X")
class A:
@@ -372,7 +373,7 @@ def test_ipa_from_class_dangling():
def test_ipa_class_on():
- @schema.load
+ @load
class data:
class A:
pass
@@ -391,7 +392,7 @@ def test_ipa_class_on_ref():
class A:
pass
- @schema.load
+ @load
class data:
@defs.synth.on_arguments(b="B", i=defs.int)
class A:
@@ -408,7 +409,7 @@ def test_ipa_class_on_ref():
def test_ipa_class_on_dangling():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
@defs.synth.on_arguments(s=defs.string, a="A", i=defs.int)
class B:
@@ -416,7 +417,7 @@ def test_ipa_class_on_dangling():
def test_ipa_class_hierarchy():
- @schema.load
+ @load
class data:
class Root:
pass
@@ -449,7 +450,7 @@ def test_ipa_class_hierarchy():
def test_class_docstring():
- @schema.load
+ @load
class data:
class A:
"""Very important class."""
@@ -460,7 +461,7 @@ def test_class_docstring():
def test_property_docstring():
- @schema.load
+ @load
class data:
class A:
x: int | defs.desc("very important property.")
@@ -471,7 +472,7 @@ def test_property_docstring():
def test_class_docstring_newline():
- @schema.load
+ @load
class data:
class A:
"""Very important
@@ -483,7 +484,7 @@ def test_class_docstring_newline():
def test_property_docstring_newline():
- @schema.load
+ @load
class data:
class A:
x: int | defs.desc("""very important
@@ -496,7 +497,7 @@ def test_property_docstring_newline():
def test_class_docstring_stripped():
- @schema.load
+ @load
class data:
class A:
"""
@@ -511,7 +512,7 @@ def test_class_docstring_stripped():
def test_property_docstring_stripped():
- @schema.load
+ @load
class data:
class A:
x: int | defs.desc("""
@@ -526,7 +527,7 @@ def test_property_docstring_stripped():
def test_class_docstring_split():
- @schema.load
+ @load
class data:
class A:
"""Very important class.
@@ -539,7 +540,7 @@ def test_class_docstring_split():
def test_property_docstring_split():
- @schema.load
+ @load
class data:
class A:
x: int | defs.desc("""very important property.
@@ -553,7 +554,7 @@ def test_property_docstring_split():
def test_class_docstring_indent():
- @schema.load
+ @load
class data:
class A:
"""
@@ -567,7 +568,7 @@ def test_class_docstring_indent():
def test_property_docstring_indent():
- @schema.load
+ @load
class data:
class A:
x: int | defs.desc("""
@@ -582,7 +583,7 @@ def test_property_docstring_indent():
def test_property_doc_override():
- @schema.load
+ @load
class data:
class A:
x: int | defs.doc("y")
@@ -595,7 +596,7 @@ def test_property_doc_override():
def test_property_doc_override_no_newlines():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class A:
x: int | defs.doc("no multiple\nlines")
@@ -603,14 +604,14 @@ def test_property_doc_override_no_newlines():
def test_property_doc_override_no_trailing_dot():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class A:
x: int | defs.doc("no dots please.")
def test_class_default_doc_name():
- @schema.load
+ @load
class data:
@defs.ql.default_doc_name("b")
class A:
@@ -622,7 +623,7 @@ def test_class_default_doc_name():
def test_null_class():
- @schema.load
+ @load
class data:
class Root:
pass
@@ -641,7 +642,7 @@ def test_null_class():
def test_null_class_cannot_be_derived():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class Root:
pass
@@ -656,7 +657,7 @@ def test_null_class_cannot_be_derived():
def test_null_class_cannot_be_defined_multiple_times():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class Root:
pass
@@ -672,7 +673,7 @@ def test_null_class_cannot_be_defined_multiple_times():
def test_uppercase_acronyms_are_rejected():
with pytest.raises(schema.Error):
- @schema.load
+ @load
class data:
class Root:
pass
diff --git a/swift/codegen/test/test_trapgen.py b/misc/codegen/test/test_trapgen.py
similarity index 97%
rename from swift/codegen/test/test_trapgen.py
rename to misc/codegen/test/test_trapgen.py
index c8121ce48ba..a81f40e0dd8 100644
--- a/swift/codegen/test/test_trapgen.py
+++ b/misc/codegen/test/test_trapgen.py
@@ -1,8 +1,8 @@
import sys
-from swift.codegen.generators import trapgen
-from swift.codegen.lib import cpp, dbscheme
-from swift.codegen.test.utils import *
+from misc.codegen.generators import trapgen
+from misc.codegen.lib import cpp, dbscheme
+from misc.codegen.test.utils import *
output_dir = pathlib.Path("path", "to", "output")
diff --git a/swift/codegen/test/utils.py b/misc/codegen/test/utils.py
similarity index 82%
rename from swift/codegen/test/utils.py
rename to misc/codegen/test/utils.py
index 3911206bfa5..e33500711f2 100644
--- a/swift/codegen/test/utils.py
+++ b/misc/codegen/test/utils.py
@@ -3,7 +3,7 @@ from unittest import mock
import pytest
-from swift.codegen.lib import render, schema, paths
+from misc.codegen.lib import render, schema, paths
schema_dir = pathlib.Path("a", "dir")
schema_file = schema_dir / "schema.py"
@@ -33,21 +33,21 @@ def render_manager(renderer):
@pytest.fixture
def opts():
ret = mock.MagicMock()
- ret.swift_dir = paths.swift_dir
+ ret.root_dir = paths.root_dir
return ret
@pytest.fixture(autouse=True)
def override_paths(tmp_path):
- with mock.patch("swift.codegen.lib.paths.swift_dir", tmp_path), \
- mock.patch("swift.codegen.lib.paths.exe_file", tmp_path / "exe"):
+ with mock.patch("misc.codegen.lib.paths.root_dir", tmp_path), \
+ mock.patch("misc.codegen.lib.paths.exe_file", tmp_path / "exe"):
yield
@pytest.fixture
def input(opts, tmp_path):
opts.schema = tmp_path / schema_file
- with mock.patch("swift.codegen.lib.schema.load_file") as load_mock:
+ with mock.patch("misc.codegen.loaders.schemaloader.load_file") as load_mock:
load_mock.return_value = schema.Schema([])
yield load_mock.return_value
assert load_mock.mock_calls == [
@@ -58,7 +58,7 @@ def input(opts, tmp_path):
@pytest.fixture
def dbscheme_input(opts, tmp_path):
opts.dbscheme = tmp_path / dbscheme_file
- with mock.patch("swift.codegen.lib.dbscheme.iterload") as load_mock:
+ with mock.patch("misc.codegen.loaders.dbschemeloader.iterload") as load_mock:
load_mock.entities = []
load_mock.side_effect = lambda _: load_mock.entities
yield load_mock
diff --git a/misc/scripts/shared-code-metrics.py b/misc/scripts/shared-code-metrics.py
new file mode 100644
index 00000000000..bfc613e5c87
--- /dev/null
+++ b/misc/scripts/shared-code-metrics.py
@@ -0,0 +1,330 @@
+#!/bin/env python3
+# Generates a report on the amount of code sharing in this repo
+#
+# The purpose of this is
+# a) To be able to understand the structure and dependencies
+# b) To provide a metric that measures the amount of shared vs non-shared code
+
+import datetime
+from pathlib import Path
+import json
+import yaml
+
+# To add more languages, add them to this list:
+languages = ['cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ql', 'ruby', 'swift']
+
+repo_location = Path(__file__).parent.parent.parent
+
+# Gets the total number of lines in a file
+def linecount(file):
+ with open(file, 'r') as fp: return len(fp.readlines())
+
+# Gets the language name from the path
+def get_language(path):
+ return path.parts[len(repo_location.parts)]
+
+# Is this path a CodeQL query file
+def is_query(path):
+ return path.suffix == '.ql'
+
+# Is this path a CodeQL library file
+def is_library(path):
+ return path.suffix == '.qll'
+
+# Is this path a relevant CodeQL file
+def is_ql(path):
+ return is_query(path) or is_library(path)
+
+# Is this file a CodeQL package file
+def is_package(path):
+ return path.name == 'qlpack.yml'
+
+# A CodeQL source file
+class QlFile:
+ def __init__(self, path):
+ self.path = path
+ self.lines = linecount(path)
+ shared = False
+
+ def language(self):
+ return get_language(self.path)
+
+ def query(self):
+ return is_query(self.path)
+
+ def library(self):
+ return is_library(self.path)
+
+ # Returns if this qlfile is not shared, and is in a pack that is only in one language
+ def isOnlyInLanguage(self, language):
+ return not self.shared and (self.package is None or self.package.languages == {language}) and self.language() == language
+
+# Represents a language folder
+class Language:
+ def __init__(self, name):
+ self.name = name
+ self.packs = []
+ self.nonshared_files = 0
+ self.nonshared_lines = 0
+ self.imported_files = 0
+ self.imported_lines = 0
+
+ def addQlFile(self, qlfile):
+ if not qlfile.shared:
+ self.nonshared_files += 1
+ self.nonshared_lines += qlfile.lines
+
+ def addSharedAsset(self, package):
+ self.imported_files += package.files
+ self.imported_lines += package.lines
+
+# A shared package or file
+class SharedAsset:
+ def __init__(self, name):
+ self.name = name
+
+# A file shared using identical-files.json
+class IdenticalFileSet(SharedAsset):
+ def __init__(self, name, ql_files):
+ self.name = name
+ self.languages = set()
+ self.files = 0
+ self.lines = 0
+ for file in ql_files:
+ file.package = self
+ file.shared = True
+ self.files = 1
+ self.lines = file.lines
+ self.languages.add(file.language())
+
+ # Gets a pretty-printed markdown link
+ def link(self):
+ return self.name
+
+# Represents all files shared in `identical-files.json`
+# Reads the file and builds a list of assets
+class IdenticalFiles:
+ def __init__(self, repo_location, ql_file_index):
+ identical_files = repo_location/'config'/'identical-files.json'
+ with open(identical_files, "r") as fp:
+ identical_files_json = json.load(fp)
+ # Create a list of assets
+ self.assets = []
+ for group in identical_files_json:
+ paths = []
+ for file in identical_files_json[group]:
+ path = repo_location / file
+ if is_ql(path):
+ ql_file_index[path].shared = True
+ paths.append(ql_file_index[path])
+ self.assets.append(IdenticalFileSet(group, paths))
+
+# A package created from a `qlpack.yml`` file
+class Package(SharedAsset):
+ def __init__(self, path, ql_file_index):
+ self.path = path
+ self.language = get_language(path)
+ self.lines = 0
+ self.files = 0
+ self.languages = set()
+ self.languages.add(self.language)
+ self.identical_files_dependencies = set()
+ with open(path, 'r') as fp:
+ y = yaml.safe_load(fp)
+ if 'name' in y:
+ self.name = y['name']
+ else:
+ self.name = path.parent.name
+ if 'dependencies' in y:
+ self.deps = y['dependencies']
+ if self.deps is None:
+ self.deps = {}
+ else:
+ self.deps = {}
+ # Mark all relevant files with their package
+ for file in ql_file_index:
+ if self.containsDirectory(file):
+ file = ql_file_index[file]
+ if not file.shared:
+ file.package = self
+ self.lines += file.lines
+ self.files += 1
+ else:
+ self.identical_files_dependencies.add(file.package)
+ self.url = "https://github.com/github/codeql/blob/main/" + str(path.relative_to(repo_location))
+
+ # Gets a pretty-printed markdown link
+ def link(self):
+ return '[' + self.name + '](' + self.url + ')'
+
+ def containsDirectory(self, dir):
+ return self.path.parent.parts == dir.parts[:len(self.path.parent.parts)]
+ # dir.startsWith(self.path.parent)
+
+ # Constructs a list of transitive depedencies of this package.
+ def calculateDependencies(self, packageNameMap):
+ self.transitive_dependencies = set(self.deps)
+ queue = list(self.deps)
+ while len(queue):
+ item = queue.pop()
+ for dep2 in packageNameMap[item].deps:
+ if dep2 not in self.transitive_dependencies:
+ self.transitive_dependencies.add(dep2)
+ queue.append(dep2)
+ # Calculate the amount of imported code
+ self.total_imported_files = 0
+ self.total_imported_lines = 0
+ self.all_dependencies = set(self.identical_files_dependencies)
+ for dep in self.transitive_dependencies:
+ self.all_dependencies.add(packageNameMap[dep])
+ for dep in self.all_dependencies:
+ self.total_imported_files += dep.files
+ self.total_imported_lines += dep.lines
+ dep.languages.add(self.language)
+
+# Create a big index of all files and their line counts.
+
+# Map from path to line count
+ql_file_index = {}
+package_files = []
+
+# Queue of directories to read
+directories_to_scan = [repo_location]
+
+while len(directories_to_scan)!=0:
+ dir = directories_to_scan.pop()
+ for p in dir.iterdir():
+ if p.is_dir():
+ directories_to_scan.append(p)
+ elif is_ql(p):
+ ql_file_index[p] = QlFile(p)
+ elif is_package(p):
+ package_files.append(p)
+
+# Create identical_files_json
+identical_files = IdenticalFiles(repo_location, ql_file_index)
+
+# Create packages
+# Do this after identical_files so that we can figure out the package sizes
+# Do this after getting the ql_file_index fully built
+packages = []
+for file in package_files:
+ packages.append(Package(file, ql_file_index))
+
+# List all shared assets
+shared_assets = packages + identical_files.assets
+
+# Construct statistics for each language
+language_info = {}
+for l in languages:
+ language_info[l] = Language(l)
+
+for qlfile in ql_file_index.values():
+ lang = qlfile.language()
+ if lang in language_info:
+ info = language_info[lang]
+ if qlfile.isOnlyInLanguage(lang):
+ info.addQlFile(qlfile)
+
+# Determine all package dependencies
+
+packageNameMap = {}
+
+for package in packages:
+ packageNameMap[package.name] = package
+
+for package in packages:
+ package.calculateDependencies(packageNameMap)
+
+for asset in shared_assets:
+ if len(asset.languages)>1:
+ for lang in asset.languages:
+ if lang in language_info:
+ language_info[lang].addSharedAsset(asset)
+
+
+# Functions to output the results
+
+def list_assets(shared_assets, language_info):
+ print('| Asset | Files | Lines |', end='')
+ for lang in language_info:
+ print('', lang, '|', end='')
+ print()
+ print('| ----- | ----- | ----- |', end='')
+ for lang in language_info:
+ print(' ---- |', end='')
+ print()
+ for asset in shared_assets:
+ print('|', asset.link(), '|', asset.files ,'|', asset.lines, '|', end=' ')
+ for lang in language_info:
+ if lang in asset.languages:
+ print('yes |', end=' ')
+ else:
+ print(' |', end=' ');
+ print()
+ print()
+
+def list_package_dependencies(package):
+ print("Package", package.path, package.name, package.files, package.lines, package.total_imported_files, package.total_imported_lines)
+ for dep in package.all_dependencies:
+ print(" ", dep.name, dep.files, dep.lines)
+
+def print_package_dependencies(packages):
+ print('| Package name | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
+ print('| ------------ | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
+ for package in packages:
+ nlines = package.lines + package.total_imported_lines
+ shared_percentage = 100 * package.total_imported_lines / nlines if nlines>0 else 0
+ print('|', package.link(), '|', package.files, '|', package.lines, '|', package.total_imported_files, '|', package.total_imported_lines, '|',
+ # ','.join([p.name for p in package.all_dependencies]),
+ "%.2f" % shared_percentage, '|')
+ print()
+
+def print_language_dependencies(packages):
+ print_package_dependencies([p for p in packages if p.name.endswith('-all') and p.name.count('-')==1])
+
+def list_shared_code_by_language(language_info):
+ # For each language directory, list the files that are (1) inside the directory and not shared,
+ # (2) packages from outside the directory, plus identical files
+ print('| Language | Non-shared files | Non-shared lines of code | Imported files | Imported lines of code | Shared code % |')
+ print('| -------- | ---------------- | ------------------------ | -------------- | ---------------------- | ------------- |')
+ for lang in language_info:
+ info = language_info[lang]
+ total = info.imported_lines + info.nonshared_lines
+ shared_percentage = 100 * info.imported_lines / total if total>0 else 0
+ print('|', lang, '|', info.nonshared_files, '|', info.nonshared_lines, '|', info.imported_files, '|', info.imported_lines, '|', "%.2f" % shared_percentage, '|')
+ print()
+
+
+# Output reports
+
+print('# Report on CodeQL code sharing\n')
+print('Generated on', datetime.datetime.now())
+print()
+
+print('## Shared code by language\n')
+
+list_shared_code_by_language(language_info)
+
+print('''
+* *Non-shared files*: The number of CodeQL files (`.ql`/`.qll`) that are only used within this language folder. Excludes `identical-files.json` that are shared between multiple languages.
+* *Non-shared lines of code*: The number of lines of code in the non-shared files.
+* *Imported files*: All CodeQL files (`.ql`/`.qll`) files that are transitively used in this language folder, either via packages or `identical-files.json`
+* *Imported lines of code*: The number of lines of code in the imported files
+* *Shared code %*: The proportion of imported lines / total lines (nonshared + imported).
+
+## Shared packages use by language
+
+A package is *used* if it is a direct or indirect dependency, or a file shared via `identical-files.json`.
+
+''')
+
+list_assets(shared_assets, language_info)
+
+print('## Shared code by language pack\n')
+
+print_language_dependencies(packages)
+
+print('## Shared code by package\n')
+
+print_package_dependencies(packages)
diff --git a/misc/suite-helpers/CHANGELOG.md b/misc/suite-helpers/CHANGELOG.md
index 108c522a649..e6532a3f5d8 100644
--- a/misc/suite-helpers/CHANGELOG.md
+++ b/misc/suite-helpers/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.4.3
+
+No user-facing changes.
+
## 0.4.2
No user-facing changes.
diff --git a/misc/suite-helpers/change-notes/released/0.4.3.md b/misc/suite-helpers/change-notes/released/0.4.3.md
new file mode 100644
index 00000000000..126fb622583
--- /dev/null
+++ b/misc/suite-helpers/change-notes/released/0.4.3.md
@@ -0,0 +1,3 @@
+## 0.4.3
+
+No user-facing changes.
diff --git a/misc/suite-helpers/codeql-pack.release.yml b/misc/suite-helpers/codeql-pack.release.yml
index 94c5b17423c..1ec9c4ea5d9 100644
--- a/misc/suite-helpers/codeql-pack.release.yml
+++ b/misc/suite-helpers/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.4.2
+lastReleaseVersion: 0.4.3
diff --git a/misc/suite-helpers/qlpack.yml b/misc/suite-helpers/qlpack.yml
index 6154a791ba2..8f380beb26b 100644
--- a/misc/suite-helpers/qlpack.yml
+++ b/misc/suite-helpers/qlpack.yml
@@ -1,3 +1,3 @@
name: codeql/suite-helpers
-version: 0.4.3-dev
+version: 0.4.4-dev
groups: shared
diff --git a/python/ql/lib/CHANGELOG.md b/python/ql/lib/CHANGELOG.md
index 473d1ebc67e..c7ade22bbcb 100644
--- a/python/ql/lib/CHANGELOG.md
+++ b/python/ql/lib/CHANGELOG.md
@@ -1,3 +1,16 @@
+## 0.8.0
+
+### Breaking Changes
+
+- Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
+ the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
+ extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
+ correct version of the Python standard library is extracted.
+
+### Minor Analysis Improvements
+
+* Fixed module resolution so we properly recognize that in `from import *`, where `` is a package, the actual imports are made from the `/__init__.py` file.
+
## 0.7.2
No user-facing changes.
diff --git a/python/ql/lib/change-notes/2023-01-16-new-call-graph.md b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
new file mode 100644
index 00000000000..3a9e6c3abc0
--- /dev/null
+++ b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
@@ -0,0 +1,4 @@
+---
+category: majorAnalysis
+---
+* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
diff --git a/python/ql/lib/change-notes/2023-02-13-hmac-modeling.md b/python/ql/lib/change-notes/2023-02-13-hmac-modeling.md
new file mode 100644
index 00000000000..2753c24a818
--- /dev/null
+++ b/python/ql/lib/change-notes/2023-02-13-hmac-modeling.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Added modeling of cryptographic operations in the `hmac` library.
diff --git a/python/ql/lib/change-notes/2023-02-16-import-if-then-else.md b/python/ql/lib/change-notes/2023-02-16-import-if-then-else.md
new file mode 100644
index 00000000000..c377014a32e
--- /dev/null
+++ b/python/ql/lib/change-notes/2023-02-16-import-if-then-else.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Fixed module resolution so we properly recognize definitions made within if-then-else statements.
diff --git a/python/ql/lib/change-notes/released/0.8.0.md b/python/ql/lib/change-notes/released/0.8.0.md
new file mode 100644
index 00000000000..16533f766b6
--- /dev/null
+++ b/python/ql/lib/change-notes/released/0.8.0.md
@@ -0,0 +1,12 @@
+## 0.8.0
+
+### Breaking Changes
+
+- Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
+ the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
+ extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
+ correct version of the Python standard library is extracted.
+
+### Minor Analysis Improvements
+
+* Fixed module resolution so we properly recognize that in `from import *`, where `` is a package, the actual imports are made from the `/__init__.py` file.
diff --git a/python/ql/lib/codeql-pack.release.yml b/python/ql/lib/codeql-pack.release.yml
index fee171e9685..37eab3197dc 100644
--- a/python/ql/lib/codeql-pack.release.yml
+++ b/python/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.7.2
+lastReleaseVersion: 0.8.0
diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml
index cc59833287a..03b59d4771c 100644
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
name: codeql/python-all
-version: 0.7.3-dev
+version: 0.8.1-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
diff --git a/python/ql/lib/semmle/python/Constants.qll b/python/ql/lib/semmle/python/Constants.qll
index 19e3e757989..03254a4bfd0 100644
--- a/python/ql/lib/semmle/python/Constants.qll
+++ b/python/ql/lib/semmle/python/Constants.qll
@@ -3,32 +3,34 @@
import python
/** the Python major version number */
-int major_version() {
- explicit_major_version(result)
- or
- not explicit_major_version(_) and
- /* If there is more than one version, prefer 2 for backwards compatibility */
- (if py_flags_versioned("version.major", "2", "2") then result = 2 else result = 3)
-}
+int major_version() { full_python_analysis_version(result, _, _) }
/** the Python minor version number */
-int minor_version() {
- exists(string v | py_flags_versioned("version.minor", v, major_version().toString()) |
- result = v.toInt()
- )
-}
+int minor_version() { full_python_analysis_version(_, result, _) }
/** the Python micro version number */
-int micro_version() {
- exists(string v | py_flags_versioned("version.micro", v, major_version().toString()) |
- result = v.toInt()
- )
+int micro_version() { full_python_analysis_version(_, _, result) }
+
+/** Gets the latest supported minor version for the given major version. */
+private int latest_supported_minor_version(int major) {
+ major = 2 and result = 7
+ or
+ major = 3 and result = 11
}
-private predicate explicit_major_version(int v) {
- exists(string version | py_flags_versioned("language.version", version, _) |
- version.charAt(0) = "2" and v = 2
- or
- version.charAt(0) = "3" and v = 3
+private predicate full_python_analysis_version(int major, int minor, int micro) {
+ exists(string version_string | py_flags_versioned("language.version", version_string, _) |
+ major = version_string.regexpFind("\\d+", 0, _).toInt() and
+ (
+ minor = version_string.regexpFind("\\d+", 1, _).toInt()
+ or
+ not exists(version_string.regexpFind("\\d+", 1, _)) and
+ minor = latest_supported_minor_version(major)
+ ) and
+ (
+ micro = version_string.regexpFind("\\d+", 2, _).toInt()
+ or
+ not exists(version_string.regexpFind("\\d+", 2, _)) and micro = 0
+ )
)
}
diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll
index 81a01025558..8dfdc4a6341 100644
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
- Stages::DataFlow::ref() and
+ Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
result.getNode() = this.getNode().getStarArg() and
result.getBasicBlock().dominates(this.getBasicBlock())
}
+
+ /** Gets a dictionary (**) argument of this call, if any. */
+ ControlFlowNode getKwargs() {
+ result.getNode() = this.getNode().getKwargs() and
+ result.getBasicBlock().dominates(this.getBasicBlock())
+ }
}
/** A control flow corresponding to an attribute expression, such as `value.attr` */
diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
index 22a2d1c1eb2..79dd19dd972 100644
--- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
@@ -26,6 +26,26 @@ private newtype TCryptographicAlgorithm =
isWeakPasswordHashingAlgorithm(name) and isWeak = true
}
+/**
+ * Gets the most specific `CryptographicAlgorithm` that matches the given `name`.
+ * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats.
+ * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match.
+ */
+bindingset[name]
+private CryptographicAlgorithm getBestAlgorithmForName(string name) {
+ result =
+ max(CryptographicAlgorithm algorithm |
+ algorithm.getName() =
+ [
+ name.toUpperCase(), // the full name
+ name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces
+ name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores
+ ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces
+ |
+ algorithm order by algorithm.getName().length()
+ )
+}
+
/**
* A cryptographic algorithm.
*/
@@ -39,15 +59,11 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
abstract string getName();
/**
- * Holds if the name of this algorithm matches `name` modulo case,
- * white space, dashes, underscores, and anything after a dash in the name
- * (to ignore modes of operation, such as CBC or ECB).
+ * Holds if the name of this algorithm is the most specific match for `name`.
+ * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc.
*/
bindingset[name]
- predicate matchesName(string name) {
- [name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
- .regexpReplaceAll("[-_ ]", "") = getName()
- }
+ predicate matchesName(string name) { this = getBestAlgorithmForName(name) }
/**
* Holds if this algorithm is weak.
diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll
index a234ba2cc1f..8bb63d97876 100644
--- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll
+++ b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll
@@ -14,8 +14,20 @@
predicate isStrongHashingAlgorithm(string name) {
name =
[
+ // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2
+ // and https://www.blake2.net/
+ "BLAKE2", "BLAKE2B", "BLAKE2S",
+ // see https://github.com/BLAKE3-team/BLAKE3
+ "BLAKE3",
+ //
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
- "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
+ "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512",
+ // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128
+ "SHAKE128", "SHAKE256",
+ // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3
+ "SM3",
+ // see https://security.stackexchange.com/a/216297
+ "WHIRLPOOL",
]
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
new file mode 100644
index 00000000000..0d391d7f98d
--- /dev/null
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
@@ -0,0 +1,1615 @@
+/**
+ * INTERNAL: Do not use.
+ *
+ * TypeTracker based call-graph.
+ *
+ * The overall scheme for resolving calls, is to notice that Python has different kinds
+ * of callables, and resolve those with different strategies. Currently we handle these
+ * completely separately:
+ * 1. plain functions (and lambdas)
+ * 2. methods on classes
+ * 3. class instantiation
+ *
+ * So we have type-trackers for each of the 3 categories above, with some considerable
+ * effort to handle different kinds of methods on classes (staticmethod, classmethod,
+ * normal), and resolving methods correctly in regards to MRO.
+ *
+ *
+ * A goal of this library is to support modeling calls that happens by third-party
+ * libraries. For example `call_later(func, arg0, arg1, foo=val)`, and the fact that the
+ * library might inject it's own arguments, for example a context that will always be
+ * passed as the actual first argument to the function. Currently the aim is to provide
+ * enough predicates for such `call_later` function to be modeled by providing
+ * additional data-flow steps for the arguments/parameters. This means we cannot have
+ * any special logic that requires an AST call to be made before we care to figure out
+ * what callable this call might end up targeting.
+ *
+ * Specifically this means that we cannot use type-backtrackers from the function of a
+ * `CallNode`, since there is no `CallNode` to backtrack from for `func` in the example
+ * above.
+ *
+ * Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to
+ * ask what targets any data-flow node has. But it's still the plan to do this!
+ */
+
+private import python
+private import DataFlowPublic
+private import DataFlowPrivate
+private import FlowSummaryImpl as FlowSummaryImpl
+private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
+private import semmle.python.internal.CachedStages
+
+newtype TParameterPosition =
+ /** Used for `self` in methods, and `cls` in classmethods. */
+ TSelfParameterPosition() or
+ TPositionalParameterPosition(int index) {
+ index = any(Parameter p).getPosition()
+ or
+ // since synthetic parameters are made for a synthetic summary callable, based on
+ // what Argument positions they have flow for, we need to make sure we have such
+ // parameter positions available.
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index)
+ } or
+ TKeywordParameterPosition(string name) {
+ name = any(Parameter p).getName()
+ or
+ // see comment for TPositionalParameterPosition
+ FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name)
+ } or
+ TStarArgsParameterPosition(int index) {
+ // since `.getPosition` does not work for `*args`, we need *args parameter positions
+ // at index 1 larger than the largest positional parameter position (and 0 must be
+ // included as well). This is a bit of an over-approximation.
+ index = 0 or
+ index = any(Parameter p).getPosition() + 1
+ } or
+ TSynthStarArgsElementParameterPosition(int index) { exists(TStarArgsParameterPosition(index)) } or
+ TDictSplatParameterPosition()
+
+/** A parameter position. */
+class ParameterPosition extends TParameterPosition {
+ /** Holds if this position represents a `self`/`cls` parameter. */
+ predicate isSelf() { this = TSelfParameterPosition() }
+
+ /** Holds if this position represents a positional parameter at (0-based) `index`. */
+ predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
+
+ /** Holds if this position represents a keyword parameter named `name`. */
+ predicate isKeyword(string name) { this = TKeywordParameterPosition(name) }
+
+ /** Holds if this position represents a `*args` parameter at (0-based) `index`. */
+ predicate isStarArgs(int index) { this = TStarArgsParameterPosition(index) }
+
+ /**
+ * Holds if this position represents a synthetic parameter at or after (0-based)
+ * position `index`, from which there will be made a store step to the real
+ * `*args` parameter.
+ */
+ predicate isSynthStarArgsElement(int index) {
+ this = TSynthStarArgsElementParameterPosition(index)
+ }
+
+ /** Holds if this position represents a `**kwargs` parameter. */
+ predicate isDictSplat() { this = TDictSplatParameterPosition() }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ this.isSelf() and result = "self"
+ or
+ exists(int index | this.isPositional(index) and result = "position " + index)
+ or
+ exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ or
+ exists(int index | this.isStarArgs(index) and result = "*args at " + index)
+ or
+ exists(int index |
+ this.isSynthStarArgsElement(index) and
+ result = "synthetic *args element at (or after) " + index
+ )
+ or
+ this.isDictSplat() and result = "**"
+ }
+}
+
+newtype TArgumentPosition =
+ /** Used for `self` in methods, and `cls` in classmethods. */
+ TSelfArgumentPosition() or
+ TPositionalArgumentPosition(int index) {
+ exists(any(CallNode c).getArg(index))
+ or
+ // since synthetic calls within a summarized callable could use a unique argument
+ // position, we need to ensure we make these available (these are specified as
+ // parameters in the flow-summary spec)
+ FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index)
+ } or
+ TKeywordArgumentPosition(string name) {
+ exists(any(CallNode c).getArgByName(name))
+ or
+ // see comment for TPositionalArgumentPosition
+ FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name)
+ } or
+ TStarArgsArgumentPosition(int index) {
+ exists(Call c | c.getPositionalArg(index) instanceof Starred)
+ } or
+ TDictSplatArgumentPosition()
+
+/** An argument position. */
+class ArgumentPosition extends TArgumentPosition {
+ /** Holds if this position represents a `self`/`cls` argument. */
+ predicate isSelf() { this = TSelfArgumentPosition() }
+
+ /** Holds if this position represents a positional argument at (0-based) `index`. */
+ predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
+
+ /** Holds if this position represents a keyword argument named `name`. */
+ predicate isKeyword(string name) { this = TKeywordArgumentPosition(name) }
+
+ /** Holds if this position represents a `*args` argument at (0-based) `index`. */
+ predicate isStarArgs(int index) { this = TStarArgsArgumentPosition(index) }
+
+ /** Holds if this position represents a `**kwargs` argument. */
+ predicate isDictSplat() { this = TDictSplatArgumentPosition() }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ this.isSelf() and result = "self"
+ or
+ exists(int pos | this.isPositional(pos) and result = "position " + pos)
+ or
+ exists(string name | this.isKeyword(name) and result = "keyword " + name)
+ or
+ exists(int index | this.isStarArgs(index) and result = "*args at " + index)
+ or
+ this.isDictSplat() and result = "**"
+ }
+}
+
+/** Holds if arguments at position `apos` match parameters at position `ppos`. */
+predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
+ ppos.isSelf() and apos.isSelf()
+ or
+ exists(int index | ppos.isPositional(index) and apos.isPositional(index))
+ or
+ exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
+ or
+ exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index))
+ or
+ exists(int paramIndex, int argIndex | argIndex >= paramIndex |
+ ppos.isSynthStarArgsElement(paramIndex) and apos.isPositional(argIndex)
+ )
+ or
+ ppos.isDictSplat() and apos.isDictSplat()
+}
+
+// =============================================================================
+// Helper predicates
+// =============================================================================
+/**
+ * Holds if the function `func` is a staticmethod -- either by having a
+ * `@staticmethod` decorator or by convention
+ * (like a `__new__` method on a class is a classmethod even without the decorator).
+ */
+predicate isStaticmethod(Function func) {
+ exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+}
+
+/**
+ * Holds if the function `func` is a classmethod -- either by having a
+ * `@classmethod` decorator or by convention
+ * (like a `__new__` method on a class is a classmethod even without the decorator).
+ */
+predicate isClassmethod(Function func) {
+ exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+ or
+ exists(Class cls |
+ cls.getAMethod() = func and
+ func.getName() in [
+ "__new__", // https://docs.python.org/3.10/reference/datamodel.html#object.__new__
+ "__init_subclass__", // https://docs.python.org/3.10/reference/datamodel.html#object.__init_subclass__
+ "__class_getitem__", // https://docs.python.org/3.10/reference/datamodel.html#object.__class_getitem__
+ ]
+ )
+}
+
+/** Holds if the function `func` has a `property` decorator. */
+predicate hasPropertyDecorator(Function func) {
+ exists(NameNode id | id.getId() = "property" and id.isGlobal() |
+ func.getADecorator() = id.getNode()
+ )
+}
+
+// =============================================================================
+// Callables
+// =============================================================================
+/** A callable defined in library code, identified by a unique string. */
+abstract class LibraryCallable extends string {
+ bindingset[this]
+ LibraryCallable() { any() }
+
+ /** Gets a call to this library callable. */
+ abstract CallCfgNode getACall();
+
+ /** Gets a data-flow node, where this library callable is used as a call-back. */
+ abstract ArgumentNode getACallback();
+}
+
+newtype TDataFlowCallable =
+ /**
+ * Is used as the target for all calls: plain functions, lambdas, methods on classes,
+ * class instantiations, and (in the future) special methods.
+ */
+ TFunction(Function func) {
+ // For generators/list-comprehensions we create a synthetic function. In the
+ // points-to call-graph these were not considered callable, and instead we added
+ // data-flow steps (read/write) for these. As an easy solution for now, we do the
+ // same to keep things easy to reason about (and therefore exclude things that do
+ // not have a definition)
+ exists(func.getDefinition())
+ } or
+ /** see QLDoc for `DataFlowModuleScope` for why we need this. */
+ TModule(Module m) or
+ TLibraryCallable(LibraryCallable callable)
+
+/** A callable. */
+abstract class DataFlowCallable extends TDataFlowCallable {
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ /** Gets qualified name for this callable, if any. */
+ abstract string getQualifiedName();
+
+ /** Gets the scope of this callable */
+ abstract Scope getScope();
+
+ /** Gets the parameter at position `ppos`, if any. */
+ abstract ParameterNode getParameter(ParameterPosition ppos);
+
+ /** Gets the underlying library callable, if any. */
+ LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
+
+ /** Gets the location of this dataflow callable. */
+ abstract Location getLocation();
+}
+
+/** A callable function. */
+abstract class DataFlowFunction extends DataFlowCallable, TFunction {
+ Function func;
+
+ DataFlowFunction() {
+ this = TFunction(func) and
+ // TODO: Handle @property decorators
+ not hasPropertyDecorator(func)
+ }
+
+ override string toString() { result = func.toString() }
+
+ override string getQualifiedName() { result = func.getQualifiedName() }
+
+ override Function getScope() { result = func }
+
+ override Location getLocation() { result = func.getLocation() }
+
+ /** Gets the positional parameter offset, to take into account self/cls parameters. */
+ int positionalOffset() { result = 0 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ exists(int index | ppos.isPositional(index) |
+ result.getParameter() = func.getArg(index + this.positionalOffset())
+ )
+ or
+ exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name))
+ or
+ // `*args`
+ exists(int index |
+ (
+ ppos.isStarArgs(index) and
+ result.getParameter() = func.getVararg()
+ or
+ ppos.isSynthStarArgsElement(index) and
+ result = TSynthStarArgsElementParameterNode(this)
+ )
+ |
+ // a `*args` parameter comes after the last positional parameter. We need to take
+ // self parameter into account, so for
+ // `def func(foo, bar, *args)` it should be index 2 (pos-param-count == 2)
+ // `class A: def func(self, foo, bar, *args)` it should be index 2 (pos-param-count - 1 == 3 - 1)
+ index = func.getPositionalParameterCount() - this.positionalOffset()
+ or
+ // no positional argument
+ not exists(func.getArg(_)) and index = 0
+ )
+ or
+ // `**kwargs`
+ // since the dataflow library has the restriction that we can only have ONE result per
+ // parameter position, if there is both a synthetic **kwargs and a real **kwargs
+ // parameter, we only give the result for the synthetic, and add local flow from the
+ // synthetic to the real. It might seem more natural to do it in the other
+ // direction, but since we have a clearStep on the real **kwargs parameter, we would have that
+ // content-clearing would also affect the synthetic parameter, which we don't want.
+ ppos.isDictSplat() and
+ if exists(func.getArgByName(_))
+ then result = TSynthDictSplatParameterNode(this)
+ else result.getParameter() = func.getKwarg()
+ }
+}
+
+/** A plain (non-method) function. */
+class DataFlowPlainFunction extends DataFlowFunction {
+ DataFlowPlainFunction() { not this instanceof DataFlowMethod }
+}
+
+/** A method. */
+class DataFlowMethod extends DataFlowFunction {
+ Class cls;
+
+ DataFlowMethod() { cls.getAMethod() = func }
+
+ /** Gets the class this function is a method of. */
+ Class getClass() { result = cls }
+
+ override int positionalOffset() { result = 1 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ ppos.isSelf() and result.getParameter() = func.getArg(0)
+ or
+ result = super.getParameter(ppos)
+ }
+}
+
+/** A classmethod. */
+class DataFlowClassmethod extends DataFlowMethod {
+ DataFlowClassmethod() { isClassmethod(func) }
+}
+
+/** A staticmethod. */
+class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction {
+ DataFlowStaticmethod() { isStaticmethod(func) }
+
+ override int positionalOffset() { result = 0 }
+
+ override ParameterNode getParameter(ParameterPosition ppos) {
+ result = DataFlowFunction.super.getParameter(ppos)
+ }
+}
+
+/**
+ * A module. This is not actually a callable, but we need this so a
+ * `ModuleVariableNode` have an enclosing callable.
+ */
+class DataFlowModuleScope extends DataFlowCallable, TModule {
+ Module mod;
+
+ DataFlowModuleScope() { this = TModule(mod) }
+
+ override string toString() { result = mod.toString() }
+
+ override string getQualifiedName() { result = mod.getName() }
+
+ override Module getScope() { result = mod }
+
+ override Location getLocation() { result = mod.getLocation() }
+
+ override ParameterNode getParameter(ParameterPosition ppos) { none() }
+}
+
+class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
+ LibraryCallable callable;
+
+ LibraryCallableValue() { this = TLibraryCallable(callable) }
+
+ override string toString() { result = "LibraryCallableValue: " + callable.toString() }
+
+ override string getQualifiedName() { result = callable.toString() }
+
+ /** Gets a data-flow node, where this library callable is used as a call-back. */
+ ArgumentNode getACallback() { result = callable.getACallback() }
+
+ override Scope getScope() { none() }
+
+ override ParameterNode getParameter(ParameterPosition ppos) { none() }
+
+ override LibraryCallable asLibraryCallable() { result = callable }
+
+ override Location getLocation() { none() }
+}
+
+// =============================================================================
+// Type trackers used to resolve calls.
+// =============================================================================
+/** Gets a call to `type`. */
+private CallCfgNode getTypeCall() {
+ exists(NameNode id | id.getId() = "type" and id.isGlobal() |
+ result.getFunction().asCfgNode() = id
+ )
+}
+
+/** Gets a call to `super`. */
+private CallCfgNode getSuperCall() {
+ // While it is possible to reference super and call it later, it's almost never done in
+ // practice. From looking at top 1000 projects, there were a few uses around mocking (see
+ // link below), but otherwise only 2 edgecases. Overall it seems ok to ignore this complexity.
+ //
+ // https://github.com/python/cpython/blob/18b1782192f85bd26db89f5bc850f8bee4247c1a/Lib/unittest/mock.py#L48-L50
+ exists(NameNode id | id.getId() = "super" and id.isGlobal() |
+ result.getFunction().asCfgNode() = id
+ )
+}
+
+/**
+ * Holds if the file `f` should be ignored when computing the call-graph.
+ *
+ * We currently see a performance problem when analyzing the `sympy` PyPI package,
+ * which can be part of the database when dependencies are installed and extracted.
+ * From what we can understand, SymPy is using Python in a exotic way, so the fact that
+ * our analysis currently does not handle this project has nothing to say about our
+ * ability to handle normal Python code. Furthermore, SymPy does not look to be relevant
+ * in a security context, so we should not lose out on any security results by doing
+ * this.
+ */
+private predicate ignoreForCallGraph(File f) {
+ f.getAbsolutePath().matches("%/site-packages/sympy/%")
+}
+
+/**
+ * Gets a reference to the function `func`.
+ */
+private TypeTrackingNode functionTracker(TypeTracker t, Function func) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ (
+ result.asExpr() = func.getDefinition()
+ or
+ // when a function is decorated, it's the result of the (last) decorator call that
+ // is used
+ result.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = functionTracker(t2, func).track(t2, t))
+}
+
+/**
+ * Gets a reference to the function `func`.
+ */
+Node functionTracker(Function func) { functionTracker(TypeTracker::end(), func).flowsTo(result) }
+
+/**
+ * Gets a reference to the class `cls`.
+ */
+private TypeTrackingNode classTracker(TypeTracker t, Class cls) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ (
+ result.asExpr() = cls.getParent()
+ or
+ // when a class is decorated, it's the result of the (last) decorator call that
+ // is used
+ result.asExpr() = cls.getParent().getADecoratorCall()
+ or
+ // `type(obj)`, where obj is an instance of this class
+ result = getTypeCall() and
+ result.(CallCfgNode).getArg(0) = classInstanceTracker(cls)
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = classTracker(t2, cls).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to the class `cls`.
+ */
+Node classTracker(Class cls) { classTracker(TypeTracker::end(), cls).flowsTo(result) }
+
+/**
+ * Gets a reference to an instance of the class `cls`.
+ */
+private TypeTrackingNode classInstanceTracker(TypeTracker t, Class cls) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ resolveClassCall(result.(CallCfgNode).asCfgNode(), cls)
+ or
+ // result of `super().__new__` as used in a `__new__` method implementation
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ exists(Class classUsedInSuper |
+ fromSuperNewCall(result.(CallCfgNode).asCfgNode(), classUsedInSuper, _, _) and
+ classUsedInSuper = getADirectSuperclass*(cls)
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = classInstanceTracker(t2, cls).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to an instance of the class `cls`.
+ */
+Node classInstanceTracker(Class cls) {
+ classInstanceTracker(TypeTracker::end(), cls).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the `self` argument of a method on class `classWithMethod`.
+ * The method cannot be a `staticmethod` or `classmethod`.
+ */
+private TypeTrackingNode selfTracker(TypeTracker t, Class classWithMethod) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ exists(Function func |
+ func = classWithMethod.getAMethod() and
+ not isStaticmethod(func) and
+ not isClassmethod(func)
+ |
+ result.asExpr() = func.getArg(0)
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = selfTracker(t2, classWithMethod).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to the `self` argument of a method on class `classWithMethod`.
+ * The method cannot be a `staticmethod` or `classmethod`.
+ */
+Node selfTracker(Class classWithMethod) {
+ selfTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the enclosing class `classWithMethod` from within one of its
+ * methods, either through the `cls` argument from a `classmethod` or from `type(self)`
+ * from a normal method.
+ */
+private TypeTrackingNode clsArgumentTracker(TypeTracker t, Class classWithMethod) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ (
+ exists(Function func |
+ func = classWithMethod.getAMethod() and
+ isClassmethod(func)
+ |
+ result.asExpr() = func.getArg(0)
+ )
+ or
+ // type(self)
+ result = getTypeCall() and
+ result.(CallCfgNode).getArg(0) = selfTracker(classWithMethod)
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = clsArgumentTracker(t2, classWithMethod).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to the enclosing class `classWithMethod` from within one of its
+ * methods, either through the `cls` argument from a `classmethod` or from `type(self)`
+ * from a normal method.
+ */
+Node clsArgumentTracker(Class classWithMethod) {
+ clsArgumentTracker(TypeTracker::end(), classWithMethod).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the result of calling `super` without any argument, where the
+ * call happened in the method `func` (either a method or a classmethod).
+ */
+private TypeTrackingNode superCallNoArgumentTracker(TypeTracker t, Function func) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ not isStaticmethod(func) and
+ exists(CallCfgNode call | result = call |
+ call = getSuperCall() and
+ not exists(call.getArg(_)) and
+ call.getScope() = func
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = superCallNoArgumentTracker(t2, func).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to the result of calling `super` without any argument, where the
+ * call happened in the method `func` (either a method or a classmethod).
+ */
+Node superCallNoArgumentTracker(Function func) {
+ superCallNoArgumentTracker(TypeTracker::end(), func).flowsTo(result)
+}
+
+/**
+ * Gets a reference to the result of calling `super` with 2 arguments, where the
+ * first is a reference to the class `cls`, and the second argument is `obj`.
+ */
+private TypeTrackingNode superCallTwoArgumentTracker(TypeTracker t, Class cls, Node obj) {
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ t.start() and
+ exists(CallCfgNode call | result = call |
+ call = getSuperCall() and
+ call.getArg(0) = classTracker(cls) and
+ call.getArg(1) = obj
+ )
+ or
+ not ignoreForCallGraph(result.getLocation().getFile()) and
+ exists(TypeTracker t2 | result = superCallTwoArgumentTracker(t2, cls, obj).track(t2, t)) and
+ not result.(ParameterNodeImpl).isParameterOf(_, any(ParameterPosition pp | pp.isSelf()))
+}
+
+/**
+ * Gets a reference to the result of calling `super` with 2 arguments, where the
+ * first is a reference to the class `cls`, and the second argument is `obj`.
+ */
+Node superCallTwoArgumentTracker(Class cls, Node obj) {
+ superCallTwoArgumentTracker(TypeTracker::end(), cls, obj).flowsTo(result)
+}
+
+// =============================================================================
+// MRO
+// =============================================================================
+/**
+ * Gets a direct superclass of the argument `cls`, if any.
+ *
+ * For `A` with the class definition `class A(B, C)` it will have results `B` and `C`.
+ */
+Class getADirectSuperclass(Class cls) { cls.getABase() = classTracker(result).asExpr() }
+
+/**
+ * Gets a direct subclass of the argument `cls`, if any.
+ *
+ *For `B` with the class definition `class A(B)` it will have result `A`.
+ */
+Class getADirectSubclass(Class cls) { cls = getADirectSuperclass(result) }
+
+/**
+ * Gets a class that, from an approximated MRO calculation, might be the next class used
+ * for member-lookup when `super().attr` is used inside the class `cls`.
+ *
+ * In the example below, with `cls=B`, this predicate will have `A` and `C` as results.
+ * ```py
+ * class A: pass
+ * class B(A): pass
+ * class C(A): pass
+ * class D(B, C): pass
+ * ```
+ *
+ * NOTE: This approximation does not handle all cases correctly, and in the example
+ * below, with `cls=A` will not have any results, although it should include `Y`.
+ *
+ * ```py
+ * class A: pass
+ * class B(A): pass
+ * class X: pass
+ * class Y(X): pass
+ * class Ex(B, Y): pass
+ * ```
+ *
+ * NOTE for debugging the results of this predicate: Since a class can be part of
+ * multiple MROs, results from this predicate might only be valid in some, but not all,
+ * inheritance chains: This is the case with the result `C` for `cls=B` in the first
+ * example -- if `B` and `C` are defined in the same file, but `D` in a different file,
+ * this might make the results from this predicate difficult to comprehend at first.
+ *
+ * For more info on the C3 MRO used in Python see:
+ * - https://docs.python.org/3/glossary.html#term-method-resolution-order
+ * - https://www.python.org/download/releases/2.3/mro/
+ * - https://opendylan.org/_static/c3-linearization.pdf
+ */
+private Class getNextClassInMro(Class cls) {
+ // class A(B, ...):
+ // `B` must be the next class after `A` in the MRO for A.
+ cls.getBase(0) = classTracker(result).asExpr()
+ or
+ // class A(B, C, D):
+ // - `C` could be the next class after `B` in MRO.
+ // - `D` could be the next class after `C` in MRO.
+ exists(Class sub, int i |
+ sub.getBase(i) = classTracker(cls).asExpr() and
+ sub.getBase(i + 1) = classTracker(result).asExpr() and
+ not result = cls
+ )
+ // There are three important properties for MRO computed with C3 in Python:
+ //
+ // 1) monotonicity: if C1 precedes C2 in the MRO of C, then C1 precedes C2 in the MRO
+ // of any subclass of C.
+ // 2) local precedence ordering: if C1 precedes C2 in the list of superclasses for C,
+ // they will keep the same order in the MRO for C (and due to monotonicity, any
+ // subclass).
+ // 3) consistency with the extended precedence graph: if A and B (that are part of the
+ // class hierarchy of C) do not have a subclass/superclass relationship on their
+ // own, the ordering of A and B in the MRO of C will be determined by the local
+ // precedence ordering in the classes that use both A and B, either directly or
+ // through a subclass. (see paper for more details)
+ //
+ // Note that not all class hierarchies are allowed with C3, see the Python 2.3 article
+ // for examples.
+}
+
+/**
+ * Gets a potential definition of the function `name` according to our approximation of
+ * MRO for the class `cls` (see `getNextClassInMro` for more information).
+ */
+Function findFunctionAccordingToMro(Class cls, string name) {
+ result = cls.getAMethod() and
+ result.getName() = name
+ or
+ not cls.getAMethod().getName() = name and
+ result = findFunctionAccordingToMro(getNextClassInMro(cls), name)
+}
+
+/**
+ * Gets a class that, from an approximated MRO calculation, might be the next class
+ * after `cls` in the MRO for `startingClass`.
+ *
+ * Note: this is almost the same as `getNextClassInMro`, except we know the
+ * `startingClass`, which can give slightly more precise results.
+ *
+ * See QLDoc for `getNextClassInMro`.
+ */
+Class getNextClassInMroKnownStartingClass(Class cls, Class startingClass) {
+ cls.getBase(0) = classTracker(result).asExpr() and
+ cls = getADirectSuperclass*(startingClass)
+ or
+ exists(Class sub, int i | sub = getADirectSuperclass*(startingClass) |
+ sub.getBase(i) = classTracker(cls).asExpr() and
+ sub.getBase(i + 1) = classTracker(result).asExpr() and
+ not result = cls
+ )
+}
+
+private Function findFunctionAccordingToMroKnownStartingClass(
+ Class cls, Class startingClass, string name
+) {
+ result = cls.getAMethod() and
+ result.getName() = name and
+ cls = getADirectSuperclass*(startingClass)
+ or
+ not cls.getAMethod().getName() = name and
+ result =
+ findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(cls,
+ startingClass), startingClass, name)
+}
+
+/**
+ * Gets a potential definition of the function `name` according to our approximation of
+ * MRO for the class `cls` (see `getNextClassInMroKnownStartingClass` for more information).
+ *
+ * Note: this is almost the same as `findFunctionAccordingToMro`, except we know the
+ * `startingClass`, which can give slightly more precise results.
+ */
+pragma[inline]
+Function findFunctionAccordingToMroKnownStartingClass(Class startingClass, string name) {
+ result = findFunctionAccordingToMroKnownStartingClass(startingClass, startingClass, name)
+}
+
+// =============================================================================
+// attribute trackers
+// =============================================================================
+/** Gets a reference to the attribute read `attr` */
+private TypeTrackingNode attrReadTracker(TypeTracker t, AttrRead attr) {
+ t.start() and
+ result = attr and
+ attr.getObject() in [
+ classTracker(_), classInstanceTracker(_), selfTracker(_), clsArgumentTracker(_),
+ superCallNoArgumentTracker(_), superCallTwoArgumentTracker(_, _)
+ ]
+ or
+ exists(TypeTracker t2 | result = attrReadTracker(t2, attr).track(t2, t))
+}
+
+/** Gets a reference to the attribute read `attr` */
+Node attrReadTracker(AttrRead attr) { attrReadTracker(TypeTracker::end(), attr).flowsTo(result) }
+
+// =============================================================================
+// call and argument resolution
+// =============================================================================
+newtype TCallType =
+ /** A call to a function that is not part of a class. */
+ CallTypePlainFunction() or
+ /**
+ * A call to an "normal" method on a class instance.
+ * Does not include staticmethods or classmethods.
+ */
+ CallTypeNormalMethod() or
+ /** A call to a staticmethod. */
+ CallTypeStaticMethod() or
+ /** A call to a classmethod. */
+ CallTypeClassMethod() or
+ /**
+ * A call to method on a class, not going through an instance method, such as
+ *
+ * ```py
+ * class Foo:
+ * def method(self, arg):
+ * pass
+ *
+ * foo = Foo()
+ * Foo.method(foo, 42)
+ * ```
+ */
+ CallTypeMethodAsPlainFunction() or
+ /** A call to a class. */
+ CallTypeClass() or
+ /** A call on a class instance, that goes to the `__call__` method of the class */
+ CallTypeClassInstanceCall()
+
+/** A type of call. */
+class CallType extends TCallType {
+ string toString() {
+ this instanceof CallTypePlainFunction and
+ result = "CallTypePlainFunction"
+ or
+ this instanceof CallTypeNormalMethod and
+ result = "CallTypeNormalMethod"
+ or
+ this instanceof CallTypeStaticMethod and
+ result = "CallTypeStaticMethod"
+ or
+ this instanceof CallTypeClassMethod and
+ result = "CallTypeClassMethod"
+ or
+ this instanceof CallTypeMethodAsPlainFunction and
+ result = "CallTypeMethodAsPlainFunction"
+ or
+ this instanceof CallTypeClass and
+ result = "CallTypeClass"
+ or
+ this instanceof CallTypeClassInstanceCall and
+ result = "CallTypeClassInstanceCall"
+ }
+}
+
+// -------------------------------------
+// method call resolution
+// -------------------------------------
+private module MethodCalls {
+ /**
+ * Holds if `call` is a call to a method `target` on an instance or class, where the
+ * instance or class is not derived from an implicit `self`/`cls` argument to a method
+ * -- for that, see `callWithinMethodImplicitSelfOrCls`.
+ *
+ * It is found by making an attribute read `attr` with the name `functionName` on a
+ * reference to the class `cls`, or to an instance of the class `cls`. The reference the
+ * attribute-read is made on is `self`.
+ */
+ pragma[nomagic]
+ private predicate directCall(
+ CallNode call, Function target, string functionName, Class cls, AttrRead attr, Node self
+ ) {
+ target = findFunctionAccordingToMroKnownStartingClass(cls, functionName) and
+ directCall_join(call, functionName, cls, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[nomagic]
+ private predicate directCall_join(
+ CallNode call, string functionName, Class cls, AttrRead attr, Node self
+ ) {
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(self, functionName) and
+ self in [classTracker(cls), classInstanceTracker(cls)]
+ }
+
+ /**
+ * Holds if `call` is a call to a method `target` derived from an implicit `self`/`cls`
+ * argument to a method within the class `classWithMethod`.
+ *
+ * It is found by making an attribute read `attr` with the name `functionName` on a
+ * reference to an implicit `self`/`cls` argument. The reference the attribute-read is
+ * made on is `self`.
+ */
+ pragma[nomagic]
+ private predicate callWithinMethodImplicitSelfOrCls(
+ CallNode call, Function target, string functionName, Class classWithMethod, AttrRead attr,
+ Node self
+ ) {
+ target = findFunctionAccordingToMro(getADirectSubclass*(classWithMethod), functionName) and
+ callWithinMethodImplicitSelfOrCls_join(call, functionName, classWithMethod, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[nomagic]
+ private predicate callWithinMethodImplicitSelfOrCls_join(
+ CallNode call, string functionName, Class classWithMethod, AttrRead attr, Node self
+ ) {
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ attr.accesses(self, functionName) and
+ self in [clsArgumentTracker(classWithMethod), selfTracker(classWithMethod)]
+ }
+
+ /**
+ * Like `fromSuper`, but only for `__new__`, and without requirement for being able to
+ * resolve the call to a known target (since the only super class might be the
+ * builtin `object`, so we never have the implementation of `__new__` in the DB).
+ */
+ predicate fromSuperNewCall(CallNode call, Class classUsedInSuper, AttrRead attr, Node self) {
+ fromSuper_join(call, "__new__", classUsedInSuper, attr, self) and
+ self in [classTracker(_), clsArgumentTracker(_)]
+ }
+
+ /**
+ * Holds if `call` is a call to a method `target`, derived from a use of `super`, either
+ * as:
+ *
+ * (1) `super(SomeClass, obj)`, where the first argument is a reference to the class
+ * `classUsedInSuper`, and the second argument is `self`.
+ *
+ * (2) `super()`. This implicit version can only happen within a method in a class.
+ * The implicit first argument is the class the call happens within `classUsedInSuper`.
+ * The implicit second argument is the `self`/`cls` parameter of the method this happens
+ * within.
+ *
+ * The method call is found by making an attribute read `attr` with the name
+ * `functionName` on the return value from the `super` call.
+ */
+ pragma[nomagic]
+ predicate fromSuper(
+ CallNode call, Function target, string functionName, Class classUsedInSuper, AttrRead attr,
+ Node self
+ ) {
+ target = findFunctionAccordingToMro(getNextClassInMro(classUsedInSuper), functionName) and
+ fromSuper_join(call, functionName, classUsedInSuper, attr, self)
+ }
+
+ /** Extracted to give good join order */
+ pragma[nomagic]
+ private predicate fromSuper_join(
+ CallNode call, string functionName, Class classUsedInSuper, AttrRead attr, Node self
+ ) {
+ call.getFunction() = attrReadTracker(attr).asCfgNode() and
+ (
+ exists(Function func |
+ attr.accesses(superCallNoArgumentTracker(func), functionName) and
+ // Requiring enclosing scope of function to be a class is a little too
+ // restrictive, since it is possible to use `super()` in a function defined inside
+ // the method, where the first argument to the nested-function will be used as
+ // implicit self argument. In practice I don't expect this to be a problem, and we
+ // did not support this with points-to either.
+ func.getEnclosingScope() = classUsedInSuper and
+ self.(ParameterNode).getParameter() = func.getArg(0)
+ )
+ or
+ attr.accesses(superCallTwoArgumentTracker(classUsedInSuper, self), functionName)
+ )
+ }
+
+ predicate resolveMethodCall(CallNode call, Function target, CallType type, Node self) {
+ (
+ directCall(call, target, _, _, _, self)
+ or
+ callWithinMethodImplicitSelfOrCls(call, target, _, _, _, self)
+ or
+ fromSuper(call, target, _, _, _, self)
+ ) and
+ (
+ // normal method call
+ type instanceof CallTypeNormalMethod and
+ (
+ self = classInstanceTracker(_)
+ or
+ self = selfTracker(_)
+ ) and
+ not isStaticmethod(target) and
+ not isClassmethod(target)
+ or
+ // method as plain function call
+ type instanceof CallTypeMethodAsPlainFunction and
+ self = classTracker(_) and
+ not isStaticmethod(target) and
+ not isClassmethod(target)
+ or
+ // staticmethod call
+ type instanceof CallTypeStaticMethod and
+ isStaticmethod(target)
+ or
+ // classmethod call
+ type instanceof CallTypeClassMethod and
+ isClassmethod(target)
+ )
+ }
+}
+
+import MethodCalls
+
+// -------------------------------------
+// class call resolution
+// -------------------------------------
+/**
+ * Holds when `call` is a call to the class `cls`.
+ *
+ * NOTE: We have this predicate mostly to be able to compare with old point-to
+ * call-graph resolution. So it could be removed in the future.
+ */
+predicate resolveClassCall(CallNode call, Class cls) {
+ call.getFunction() = classTracker(cls).asCfgNode()
+ or
+ // `cls()` inside a classmethod (which also contains `type(self)()` inside a method)
+ exists(Class classWithMethod |
+ call.getFunction() = clsArgumentTracker(classWithMethod).asCfgNode() and
+ getADirectSuperclass*(cls) = classWithMethod
+ )
+}
+
+/**
+ * Gets a function, either `__init__` or `__new__` as specified by `funcName`, that will
+ * be invoked when `cls` is constructed -- where the function lookup is based on our MRO
+ * calculation.
+ */
+Function invokedFunctionFromClassConstruction(Class cls, string funcName) {
+ // as described in https://docs.python.org/3/reference/datamodel.html#object.__new__
+ // __init__ will only be called when __new__ returns an instance of the class (which
+ // is not a requirement). However, for simplicity, we assume that __init__ will always
+ // be called.
+ funcName in ["__init__", "__new__"] and
+ result = findFunctionAccordingToMroKnownStartingClass(cls, funcName)
+}
+
+/**
+ * Holds when `call` is a call on a class instance, that goes to the `__call__` method
+ * of the class.
+ *
+ * See https://docs.python.org/3/reference/datamodel.html#object.__call__
+ */
+predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
+ exists(Class cls |
+ call.getFunction() = classInstanceTracker(cls).asCfgNode() and
+ target = findFunctionAccordingToMroKnownStartingClass(cls, "__call__")
+ or
+ call.getFunction() = selfTracker(cls).asCfgNode() and
+ target = findFunctionAccordingToMro(getADirectSubclass*(cls), "__call__")
+ |
+ self.asCfgNode() = call.getFunction()
+ )
+}
+
+// -------------------------------------
+// overall call resolution
+// -------------------------------------
+/**
+ * Holds if `call` is a call to the `target`, with call-type `type`.
+ */
+cached
+predicate resolveCall(CallNode call, Function target, CallType type) {
+ Stages::DataFlow::ref() and
+ (
+ type instanceof CallTypePlainFunction and
+ call.getFunction() = functionTracker(target).asCfgNode() and
+ not exists(Class cls | cls.getAMethod() = target)
+ or
+ resolveMethodCall(call, target, type, _)
+ or
+ type instanceof CallTypeClass and
+ exists(Class cls |
+ resolveClassCall(call, cls) and
+ target = invokedFunctionFromClassConstruction(cls, _)
+ )
+ or
+ type instanceof CallTypeClassInstanceCall and
+ resolveClassInstanceCall(call, target, _)
+ )
+}
+
+// =============================================================================
+// Argument resolution
+// =============================================================================
+/**
+ * Holds if the argument of `call` at position `apos` is `arg`. This is just a helper
+ * predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`.
+ */
+cached
+predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
+ exists(int index |
+ apos.isPositional(index) and
+ arg.asCfgNode() = call.getArg(index)
+ )
+ or
+ exists(string name |
+ apos.isKeyword(name) and
+ arg.asCfgNode() = call.getArgByName(name)
+ )
+ or
+ // the first `*args`
+ exists(int index |
+ apos.isStarArgs(index) and
+ arg.asCfgNode() = call.getStarArg() and
+ // since `CallNode.getArg` doesn't include `*args`, we need to drop to the AST level
+ // to get the index. Notice that we only use the AST for getting the index, so we
+ // don't need to check for dominance in regards to splitting.
+ call.getStarArg().getNode() = call.getNode().getPositionalArg(index).(Starred).getValue()
+ )
+ or
+ apos.isDictSplat() and
+ (
+ arg.asCfgNode() = call.getKwargs()
+ or
+ arg = TSynthDictSplatArgumentNode(call)
+ )
+}
+
+/**
+ * Gets the argument `arg` of `call` at position `apos`, if any. Requires that we can
+ * resolve `call` to `target` with CallType `type`.
+ *
+ * It might seem like it's enough to know the CallType to resolve arguments. The reason
+ * we also need the `target`, is to avoid cross-talk. In the example below, assuming
+ * that `Foo` and `Bar` define their own `meth` methods, we might end up passing _both_
+ * `foo` and `bar` to both `Foo.meth` and `Bar.meth`, which is wrong. Since the
+ * attribute access uses the same name, we need to also distinguish on the resolved
+ * target, to know which of the two objects to pass as the self argument.
+ *
+ *
+ * ```py
+ * foo = Foo()
+ * bar = Bar()
+ * if cond:
+ * func = foo.meth
+ * else:
+ * func = bar.meth
+ * func(42)
+ * ```
+ *
+ * Note: If `Bar.meth` and `Foo.meth` resolves to the same function, we will end up
+ * sending both `self` arguments to that function, which is by definition the right thing to do.
+ *
+ * ### Bound methods
+ *
+ * For bound methods, such as `bm = x.m; bm()`, it's a little unclear whether we should
+ * still use the object in the attribute lookup (`x.m`) as the self argument in the
+ * call (`bm()`). We currently do this, but there might also be cases where we don't
+ * want to do this.
+ *
+ * In the example below, we want to clear taint from the list before it reaches the
+ * sink, but because we don't have a use of `l` in the `clear()` call, we currently
+ * don't have any way to achieve our goal. (Note that this is a contrived example)
+ *
+ * ```py
+ * l = list()
+ * clear = l.clear
+ * l.append(tainted)
+ * clear()
+ * sink(l)
+ * ```
+ *
+ * To make the above even worse, bound-methods have a `__self__` property that refers to
+ * the object of the bound-method, so we can re-write the code as:
+ *
+ * ```py
+ * l = list()
+ * clear = l.clear
+ * clear.__self__.append(tainted)
+ * clear()
+ * sink(l)
+ * ```
+ *
+ * One idea to solve this is to track the object in a synthetic data-flow node every
+ * time the bound method is used, such that the `clear()` call would essentially be
+ * translated into `l.clear()`, and we can still have use-use flow.
+ */
+cached
+predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
+ Stages::DataFlow::ref() and
+ resolveCall(call, target, type) and
+ (
+ type instanceof CallTypePlainFunction and
+ normalCallArg(call, arg, apos)
+ or
+ // self argument for normal method calls -- see note above about bound methods
+ type instanceof CallTypeNormalMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ // the dataflow library has a requirement that arguments and calls are in same enclosing
+ // callable. This requirement would be broken if we used `my_obj` as the self
+ // argument in the `f()` call in the example below:
+ // ```py
+ // def call_func(f):
+ // f()
+ //
+ // call_func(my_obj.some_method)
+ // ```
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // cls argument for classmethod calls -- see note above about bound methods
+ type instanceof CallTypeClassMethod and
+ apos.isSelf() and
+ resolveMethodCall(call, target, type, arg) and
+ (arg = classTracker(_) or arg = clsArgumentTracker(_)) and
+ // dataflow lib has requirement that arguments and calls are in same enclosing callable.
+ exists(CfgNode cfgNode | cfgNode.getNode() = call |
+ cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
+ )
+ or
+ // normal arguments for method calls
+ (
+ type instanceof CallTypeNormalMethod or
+ type instanceof CallTypeStaticMethod or
+ type instanceof CallTypeClassMethod
+ ) and
+ normalCallArg(call, arg, apos)
+ or
+ // method as plain function call.
+ //
+ // argument index 0 of call has position self (and MUST be given as positional
+ // argument in call). This also means that call-arguments are shifted by 1, such
+ // that argument index 1 of call has argument position 0
+ type instanceof CallTypeMethodAsPlainFunction and
+ (
+ apos.isSelf() and arg.asCfgNode() = call.getArg(0)
+ or
+ not apos.isPositional(_) and normalCallArg(call, arg, apos)
+ or
+ exists(ArgumentPosition normalPos, int index |
+ apos.isPositional(index - 1) and
+ normalPos.isPositional(index) and
+ normalCallArg(call, arg, normalPos)
+ )
+ )
+ or
+ // class call
+ type instanceof CallTypeClass and
+ (
+ // only pass synthetic node for created object to __init__, and not __new__ since
+ // __new__ is a classmethod.
+ target = invokedFunctionFromClassConstruction(_, "__init__") and
+ apos.isSelf() and
+ arg = TSyntheticPreUpdateNode(call)
+ or
+ normalCallArg(call, arg, apos)
+ )
+ or
+ // call on class instance, which goes to `__call__` method
+ type instanceof CallTypeClassInstanceCall and
+ (
+ apos.isSelf() and
+ resolveClassInstanceCall(call, target, arg)
+ or
+ normalCallArg(call, arg, apos)
+ )
+ )
+}
+
+// =============================================================================
+// DataFlowCall
+// =============================================================================
+newtype TDataFlowCall =
+ TNormalCall(CallNode call, Function target, CallType type) { resolveCall(call, target, type) } or
+ TPotentialLibraryCall(CallNode call) or
+ /** A synthesized call inside a summarized callable */
+ TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
+ FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
+ }
+
+/** A call that is taken into account by the global data flow computation. */
+abstract class DataFlowCall extends TDataFlowCall {
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ /** Get the callable to which this call goes. */
+ abstract DataFlowCallable getCallable();
+
+ /** Gets the argument at position `apos`, if any. */
+ abstract ArgumentNode getArgument(ArgumentPosition apos);
+
+ /** Get the control flow node representing this call, if any. */
+ abstract ControlFlowNode getNode();
+
+ /** Gets the enclosing callable of this call. */
+ abstract DataFlowCallable getEnclosingCallable();
+
+ /** Gets the location of this dataflow call. */
+ abstract Location getLocation();
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** A call found in the program source (as opposed to a synthesised call). */
+abstract class ExtractedDataFlowCall extends DataFlowCall {
+ override Location getLocation() { result = this.getNode().getLocation() }
+}
+
+/**
+ * A resolved call in source code with an underlying `CallNode`.
+ *
+ * This is considered normal, compared with special calls such as `obj[0]` calling the
+ * `__getitem__` method on the object. However, this also includes calls that go to the
+ * `__call__` special method.
+ */
+class NormalCall extends ExtractedDataFlowCall, TNormalCall {
+ CallNode call;
+ Function target;
+ CallType type;
+
+ NormalCall() { this = TNormalCall(call, target, type) }
+
+ override string toString() {
+ // note: if we used toString directly on the CallNode we would get
+ // `ControlFlowNode for func()`
+ // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
+ // instead.
+ result = call.getNode().toString()
+ }
+
+ override ControlFlowNode getNode() { result = call }
+
+ override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
+
+ override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
+
+ override ArgumentNode getArgument(ArgumentPosition apos) {
+ getCallArg(call, target, type, result, apos)
+ }
+
+ /** Gets the `CallType` of this call. */
+ CallType getCallType() { result = type }
+}
+
+/**
+ * A potential call to a summarized callable, a `LibraryCallable`.
+ *
+ * We currently exclude all resolved calls. This means that a call to, say, `map`, which
+ * is a `ClassCall`, cannot currently be given a summary.
+ * We hope to lift this restriction in the future and include all potential calls to summaries
+ * in this class.
+ */
+class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall {
+ CallNode call;
+
+ PotentialLibraryCall() { this = TPotentialLibraryCall(call) }
+
+ override string toString() {
+ // note: if we used toString directly on the CallNode we would get
+ // `ControlFlowNode for func()`
+ // but the `ControlFlowNode` part is just clutter, so we go directly to the AST node
+ // instead.
+ result = call.getNode().toString()
+ }
+
+ // We cannot refer to a `PotentialLibraryCall` here,
+ // as that could in turn refer to type tracking.
+ // This call will be tied to a `PotentialLibraryCall` via
+ // `viableCallable` when the global data flow is assembled.
+ override DataFlowCallable getCallable() { none() }
+
+ override ArgumentNode getArgument(ArgumentPosition apos) {
+ normalCallArg(call, result, apos)
+ or
+ // potential self argument, from `foo.bar()` -- note that this could also just be a
+ // module reference, but we really don't have a good way of knowing :|
+ apos.isSelf() and
+ result.asCfgNode() = call.getFunction().(AttrNode).getObject()
+ }
+
+ override ControlFlowNode getNode() { result = call }
+
+ override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
+}
+
+/**
+ * A synthesized call inside a callable with a flow summary.
+ *
+ * For example, in
+ * ```python
+ * map(lambda x: x + 1, [1, 2, 3])
+ * ```
+ *
+ * there is a synthesized call to the lambda argument inside `map`.
+ */
+class SummaryCall extends DataFlowCall, TSummaryCall {
+ private FlowSummaryImpl::Public::SummarizedCallable c;
+ private Node receiver;
+
+ SummaryCall() { this = TSummaryCall(c, receiver) }
+
+ /** Gets the data flow node that this call targets. */
+ Node getReceiver() { result = receiver }
+
+ override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
+
+ override DataFlowCallable getCallable() { none() }
+
+ override ArgumentNode getArgument(ArgumentPosition apos) { none() }
+
+ override ControlFlowNode getNode() { none() }
+
+ override string toString() { result = "[summary] call to " + receiver + " in " + c }
+
+ override Location getLocation() { none() }
+}
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+abstract class ParameterNodeImpl extends Node {
+ /** Gets the `Parameter` this `ParameterNode` represents. */
+ abstract Parameter getParameter();
+
+ /**
+ * Holds if this node is the parameter of callable `c` at the
+ * position `ppos`.
+ */
+ predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
+ this = c.getParameter(ppos)
+ }
+}
+
+/** A parameter for a library callable with a flow summary. */
+class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
+ private FlowSummaryImpl::Public::SummarizedCallable sc;
+ private ParameterPosition pos;
+
+ SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
+
+ override Parameter getParameter() { none() }
+
+ override predicate isParameterOf(DataFlowCallable c, ParameterPosition ppos) {
+ sc = c.asLibraryCallable() and
+ ppos = pos and
+ // avoid overlap with `SynthDictSplatParameterNode`
+ not (
+ pos.isDictSplat() and
+ exists(ParameterPosition keywordPos |
+ FlowSummaryImpl::Private::summaryParameterNodeRange(sc, keywordPos) and
+ keywordPos.isKeyword(_)
+ )
+ )
+ }
+
+ override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
+
+ override string toString() { result = "parameter " + pos + " of " + sc }
+
+ // Hack to return "empty location"
+ override predicate hasLocationInfo(
+ string file, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ file = "" and
+ startline = 0 and
+ startcolumn = 0 and
+ endline = 0 and
+ endcolumn = 0
+ }
+}
+
+/** A data-flow node used to model flow summaries. */
+class SummaryNode extends Node, TSummaryNode {
+ private FlowSummaryImpl::Public::SummarizedCallable c;
+ private FlowSummaryImpl::Private::SummaryNodeState state;
+
+ SummaryNode() { this = TSummaryNode(c, state) }
+
+ override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
+
+ override string toString() { result = "[summary] " + state + " in " + c }
+
+ // Hack to return "empty location"
+ override predicate hasLocationInfo(
+ string file, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ file = "" and
+ startline = 0 and
+ startcolumn = 0 and
+ endline = 0 and
+ endcolumn = 0
+ }
+}
+
+private class SummaryReturnNode extends SummaryNode, ReturnNode {
+ private ReturnKind rk;
+
+ SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
+
+ override ReturnKind getKind() { result = rk }
+}
+
+private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
+ SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
+
+ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
+ FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
+ }
+}
+
+private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNodeImpl {
+ private Node pre;
+
+ SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
+
+ override Node getPreUpdateNode() { result = pre }
+}
+
+/** Gets a viable run-time target for the call `call`. */
+DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
+ result = call.getCallable()
+ or
+ // A call to a library callable with a flow summary
+ // In this situation we can not resolve the callable from the call,
+ // as that would make data flow depend on type tracking.
+ // Instead we resolve the call from the summary.
+ exists(LibraryCallable callable |
+ result = TLibraryCallable(callable) and
+ call.getNode() = callable.getACall().getNode() and
+ call instanceof PotentialLibraryCall
+ )
+}
+
+// =============================================================================
+// Remaining required data-flow things
+// =============================================================================
+private newtype TReturnKind = TNormalReturnKind()
+
+/**
+ * A return kind. A return kind describes how a value can be returned
+ * from a callable. For Python, this is simply a method return.
+ */
+class ReturnKind extends TReturnKind {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "return" }
+}
+
+/** A data flow node that represents a value returned by a callable. */
+abstract class ReturnNode extends Node {
+ /** Gets the kind of this return node. */
+ ReturnKind getKind() { any() }
+}
+
+/** A data flow node that represents a value returned by a callable. */
+class ExtractedReturnNode extends ReturnNode, CfgNode {
+ // See `TaintTrackingImplementation::returnFlowStep`
+ ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
+
+ override ReturnKind getKind() { any() }
+}
+
+/** A data-flow node that represents the output of a call. */
+abstract class OutNode extends Node {
+ /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
+ abstract DataFlowCall getCall(ReturnKind kind);
+}
+
+private module OutNodes {
+ /**
+ * A data-flow node that reads a value returned directly by a callable.
+ */
+ class ExprOutNode extends OutNode, ExprNode {
+ private DataFlowCall call;
+
+ ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
+
+ override DataFlowCall getCall(ReturnKind kind) {
+ result = call and
+ kind = kind
+ }
+ }
+
+ private class SummaryOutNode extends SummaryNode, OutNode {
+ SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
+
+ override DataFlowCall getCall(ReturnKind kind) {
+ FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
+ }
+ }
+}
+
+/**
+ * Gets a node that can read the value returned from `call` with return kind
+ * `kind`.
+ */
+OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll
deleted file mode 100644
index 0efae6ae45c..00000000000
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll
+++ /dev/null
@@ -1,838 +0,0 @@
-/**
- * INTERNAL: Do not use.
- *
- * Points-to based call-graph.
- */
-
-private import python
-private import DataFlowPublic
-private import semmle.python.SpecialMethods
-private import FlowSummaryImpl as FlowSummaryImpl
-
-/** A parameter position represented by an integer. */
-class ParameterPosition extends int {
- ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
-
- /** Holds if this position represents a positional parameter at position `pos`. */
- predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
-}
-
-/** An argument position represented by an integer. */
-class ArgumentPosition extends int {
- ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
-
- /** Holds if this position represents a positional argument at position `pos`. */
- predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
-}
-
-/** Holds if arguments at position `apos` match parameters at position `ppos`. */
-pragma[inline]
-predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
-
-/**
- * Computes routing of arguments to parameters
- *
- * When a call contains more positional arguments than there are positional parameters,
- * the extra positional arguments are passed as a tuple to a starred parameter. This is
- * achieved by synthesizing a node `TPosOverflowNode(call, callable)`
- * that represents the tuple of extra positional arguments. There is a store step from each
- * extra positional argument to this node.
- *
- * CURRENTLY NOT SUPPORTED:
- * When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
- *
- * CURRENTLY NOT SUPPORTED:
- * If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
- * positional arguments are passed to the starred argument.
- *
- * When a call contains keyword arguments that do not correspond to keyword parameters, these
- * extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
- * achieved by synthesizing a node `TKwOverflowNode(call, callable)`
- * that represents the dictionary of extra keyword arguments. There is a store step from each
- * extra keyword argument to this node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
- * the value at such a key is unpacked and passed to the parameter. This is achieved
- * by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
- * value. This node is used as the argument passed to the matching keyword parameter. There is a read
- * step from the dictionary argument to the synthesized argument node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
- * entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
- * adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
- * step to clear content of that node at any unpacked keys.
- *
- * ## Examples:
- * Assume that we have the callable
- * ```python
- * def f(x, y, *t, **d):
- * pass
- * ```
- * Then the call
- * ```python
- * f(0, 1, 2, a=3)
- * ```
- * will be modeled as
- * ```python
- * f(0, 1, [*t], [**d])
- * ```
- * where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
- * `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
- * There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
- * `a`.
- *
- * For the call
- * ```python
- * f(0, **{"y": 1, "a": 3})
- * ```
- * no tuple argument is synthesized. It is modeled as
- * ```python
- * f(0, [y=1], [**d])
- * ```
- * where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
- * a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
- * `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
- * a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
- */
-module ArgumentPassing {
- /**
- * Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
- *
- * It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
- *
- * Used to limit the size of predicates.
- */
- predicate connects(CallNode call, CallableValue callable) {
- exists(NormalCall c |
- call = c.getNode() and
- callable = c.getCallable().getCallableValue()
- )
- }
-
- /**
- * Gets the `n`th parameter of `callable`.
- * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
- * If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
- * Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
- * as it is an explicit parameter at position 0.
- */
- NameNode getParameter(CallableValue callable, int n) {
- // positional parameter
- result = callable.getParameter(n)
- or
- // starred parameter, `*tuple`
- exists(Function f |
- f = callable.getScope() and
- n = -1 and
- result = f.getVararg().getAFlowNode()
- )
- or
- // doubly starred parameter, `**dict`
- exists(Function f |
- f = callable.getScope() and
- n = -2 and
- result = f.getKwarg().getAFlowNode()
- )
- }
-
- /**
- * A type representing a mapping from argument indices to parameter indices.
- * We currently use two mappings: NoShift, the identity, used for ordinary
- * function calls, and ShiftOneUp which is used for calls where an extra argument
- * is inserted. These include method calls, constructor calls and class calls.
- * In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
- */
- newtype TArgParamMapping =
- TNoShift() or
- TShiftOneUp()
-
- /** A mapping used for parameter passing. */
- abstract class ArgParamMapping extends TArgParamMapping {
- /** Gets the index of the parameter that corresponds to the argument at index `argN`. */
- bindingset[argN]
- abstract int getParamN(int argN);
-
- /** Gets a textual representation of this element. */
- abstract string toString();
- }
-
- /** A mapping that passes argument `n` to parameter `n`. */
- class NoShift extends ArgParamMapping, TNoShift {
- NoShift() { this = TNoShift() }
-
- override string toString() { result = "NoShift [n -> n]" }
-
- bindingset[argN]
- override int getParamN(int argN) { result = argN }
- }
-
- /** A mapping that passes argument `n` to parameter `n+1`. */
- class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
- ShiftOneUp() { this = TShiftOneUp() }
-
- override string toString() { result = "ShiftOneUp [n -> n+1]" }
-
- bindingset[argN]
- override int getParamN(int argN) { result = argN + 1 }
- }
-
- /**
- * Gets the node representing the argument to `call` that is passed to the parameter at
- * (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
- * at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
- *
- * `mapping` will be the identity for function calls, but not for method- or constructor calls,
- * where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
- * Similarly for classmethod calls, where the first parameter is `cls`.
- *
- * NOT SUPPORTED: Keyword-only parameters.
- */
- Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
- connects(call, callable) and
- (
- // positional argument
- exists(int argN |
- paramN = mapping.getParamN(argN) and
- result = TCfgNode(call.getArg(argN))
- )
- or
- // keyword argument
- // TODO: Since `getArgName` have no results for keyword-only parameters,
- // these are currently not supported.
- exists(Function f, string argName |
- f = callable.getScope() and
- f.getArgName(paramN) = argName and
- result = TCfgNode(call.getArgByName(unbind_string(argName)))
- )
- or
- // a synthesized argument passed to the starred parameter (at position -1)
- callable.getScope().hasVarArg() and
- paramN = -1 and
- result = TPosOverflowNode(call, callable)
- or
- // a synthesized argument passed to the doubly starred parameter (at position -2)
- callable.getScope().hasKwArg() and
- paramN = -2 and
- result = TKwOverflowNode(call, callable)
- or
- // argument unpacked from dict
- exists(string name |
- call_unpacks(call, mapping, callable, name, paramN) and
- result = TKwUnpackedNode(call, callable, name)
- )
- )
- }
-
- /** Currently required in `getArg` in order to prevent a bad join. */
- bindingset[result, s]
- private string unbind_string(string s) { result <= s and s <= result }
-
- /** Gets the control flow node that is passed as the `n`th overflow positional argument. */
- ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
- connects(call, callable) and
- exists(Function f, int posCount, int argNr |
- f = callable.getScope() and
- f.hasVarArg() and
- posCount = f.getPositionalParameterCount() and
- result = call.getArg(argNr) and
- argNr >= posCount and
- argNr = posCount + n
- )
- }
-
- /** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
- ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
- connects(call, callable) and
- exists(Function f |
- f = callable.getScope() and
- f.hasKwArg() and
- not exists(f.getArgByName(key)) and
- result = call.getArgByName(key)
- )
- }
-
- /**
- * Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
- * It will then be passed to the parameter of `callable` at index `paramN`.
- */
- predicate call_unpacks(
- CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
- ) {
- connects(call, callable) and
- exists(Function f |
- f = callable.getScope() and
- not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
- name = f.getArgName(paramN) and
- // not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
- // TODO: make the below logic respect control flow splitting (by not going to the AST).
- not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
- paramN >= 0 and
- paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
- exists(call.getNode().getKwargs()) // dict argument available
- )
- }
-}
-
-import ArgumentPassing
-
-/** A callable defined in library code, identified by a unique string. */
-abstract class LibraryCallable extends string {
- bindingset[this]
- LibraryCallable() { any() }
-
- /** Gets a call to this library callable. */
- abstract CallCfgNode getACall();
-
- /** Gets a data-flow node, where this library callable is used as a call-back. */
- abstract ArgumentNode getACallback();
-}
-
-/**
- * IPA type for DataFlowCallable.
- *
- * A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
- * A module has no calls.
- */
-newtype TDataFlowCallable =
- TCallableValue(CallableValue callable) {
- callable instanceof FunctionValue and
- not callable.(FunctionValue).isLambda()
- or
- callable instanceof ClassValue
- } or
- TLambda(Function lambda) { lambda.isLambda() } or
- TModule(Module m) or
- TLibraryCallable(LibraryCallable callable)
-
-/** A callable. */
-class DataFlowCallable extends TDataFlowCallable {
- /** Gets a textual representation of this element. */
- string toString() { result = "DataFlowCallable" }
-
- /** Gets a call to this callable. */
- CallNode getACall() { none() }
-
- /** Gets the scope of this callable */
- Scope getScope() { none() }
-
- /** Gets the specified parameter of this callable */
- NameNode getParameter(int n) { none() }
-
- /** Gets the name of this callable. */
- string getName() { none() }
-
- /** Gets a callable value for this callable, if any. */
- CallableValue getCallableValue() { none() }
-
- /** Gets the underlying library callable, if any. */
- LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
-
- Location getLocation() { none() }
-}
-
-/** A class representing a callable value. */
-class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
- CallableValue callable;
-
- DataFlowCallableValue() { this = TCallableValue(callable) }
-
- override string toString() { result = callable.toString() }
-
- override CallNode getACall() { result = callable.getACall() }
-
- override Scope getScope() { result = callable.getScope() }
-
- override NameNode getParameter(int n) { result = getParameter(callable, n) }
-
- override string getName() { result = callable.getName() }
-
- override CallableValue getCallableValue() { result = callable }
-}
-
-/** A class representing a callable lambda. */
-class DataFlowLambda extends DataFlowCallable, TLambda {
- Function lambda;
-
- DataFlowLambda() { this = TLambda(lambda) }
-
- override string toString() { result = lambda.toString() }
-
- override CallNode getACall() { result = this.getCallableValue().getACall() }
-
- override Scope getScope() { result = lambda.getEvaluatingScope() }
-
- override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
-
- override string getName() { result = "Lambda callable" }
-
- override FunctionValue getCallableValue() {
- result.getOrigin().getNode() = lambda.getDefinition()
- }
-
- Expr getDefinition() { result = lambda.getDefinition() }
-}
-
-/** A class representing the scope in which a `ModuleVariableNode` appears. */
-class DataFlowModuleScope extends DataFlowCallable, TModule {
- Module mod;
-
- DataFlowModuleScope() { this = TModule(mod) }
-
- override string toString() { result = mod.toString() }
-
- override CallNode getACall() { none() }
-
- override Scope getScope() { result = mod }
-
- override NameNode getParameter(int n) { none() }
-
- override string getName() { result = mod.getName() }
-
- override CallableValue getCallableValue() { none() }
-}
-
-class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
- LibraryCallable callable;
-
- LibraryCallableValue() { this = TLibraryCallable(callable) }
-
- override string toString() { result = callable.toString() }
-
- override CallNode getACall() { result = callable.getACall().getNode() }
-
- /** Gets a data-flow node, where this library callable is used as a call-back. */
- ArgumentNode getACallback() { result = callable.getACallback() }
-
- override Scope getScope() { none() }
-
- override NameNode getParameter(int n) { none() }
-
- override string getName() { result = callable }
-
- override LibraryCallable asLibraryCallable() { result = callable }
-}
-
-/**
- * IPA type for DataFlowCall.
- *
- * Calls corresponding to `CallNode`s are either to callable values or to classes.
- * The latter is directed to the callable corresponding to the `__init__` method of the class.
- *
- * An `__init__` method can also be called directly, so that the callable can be targeted by
- * different types of calls. In that case, the parameter mappings will be different,
- * as the class call will synthesize an argument node to be mapped to the `self` parameter.
- *
- * A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
- *
- * TODO: Add `TClassMethodCall` mapping `cls` appropriately.
- */
-newtype TDataFlowCall =
- /**
- * Includes function calls, method calls, class calls and library calls.
- * All these will be associated with a `CallNode`.
- */
- TNormalCall(CallNode call) or
- /**
- * Includes calls to special methods.
- * These will be associated with a `SpecialMethodCallNode`.
- */
- TSpecialCall(SpecialMethodCallNode special) or
- /** A synthesized call inside a summarized callable */
- TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
- FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
- }
-
-/** A call found in the program source (as opposed to a synthesised summary call). */
-class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
-
-/** A call that is taken into account by the global data flow computation. */
-abstract class DataFlowCall extends TDataFlowCall {
- /** Gets a textual representation of this element. */
- abstract string toString();
-
- /** Get the callable to which this call goes, if such exists. */
- abstract DataFlowCallable getCallable();
-
- /**
- * Gets the argument to this call that will be sent
- * to the `n`th parameter of the callable, if any.
- */
- abstract Node getArg(int n);
-
- /** Get the control flow node representing this call, if any. */
- abstract ControlFlowNode getNode();
-
- /** Gets the enclosing callable of this call. */
- abstract DataFlowCallable getEnclosingCallable();
-
- /** Gets the location of this dataflow call. */
- abstract Location getLocation();
-
- /**
- * Holds if this element is at the specified location.
- * The location spans column `startcolumn` of line `startline` to
- * column `endcolumn` of line `endline` in file `filepath`.
- * For more information, see
- * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
- */
- predicate hasLocationInfo(
- string filepath, int startline, int startcolumn, int endline, int endcolumn
- ) {
- this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
- }
-}
-
-/** A call found in the program source (as opposed to a synthesised call). */
-abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
- final override Location getLocation() { result = this.getNode().getLocation() }
-
- abstract override DataFlowCallable getCallable();
-
- abstract override Node getArg(int n);
-
- abstract override ControlFlowNode getNode();
-}
-
-/** A call associated with a `CallNode`. */
-class NormalCall extends ExtractedDataFlowCall, TNormalCall {
- CallNode call;
-
- NormalCall() { this = TNormalCall(call) }
-
- override string toString() { result = call.toString() }
-
- abstract override Node getArg(int n);
-
- override CallNode getNode() { result = call }
-
- abstract override DataFlowCallable getCallable();
-
- override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
-}
-
-/**
- * A call to a function.
- * This excludes calls to bound methods, classes, and special methods.
- * Bound method calls and class calls insert an argument for the explicit
- * `self` parameter, and special method calls have special argument passing.
- */
-class FunctionCall extends NormalCall {
- DataFlowCallableValue callable;
-
- FunctionCall() {
- call = any(FunctionValue f).getAFunctionCall() and
- call = callable.getACall()
- }
-
- override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
- override DataFlowCallable getCallable() { result = callable }
-}
-
-/** A call to a lambda. */
-class LambdaCall extends NormalCall {
- DataFlowLambda callable;
-
- LambdaCall() {
- call = callable.getACall() and
- callable = TLambda(any(Function f))
- }
-
- override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
- override DataFlowCallable getCallable() { result = callable }
-}
-
-/**
- * Represents a call to a bound method call.
- * The node representing the instance is inserted as argument to the `self` parameter.
- */
-class MethodCall extends NormalCall {
- FunctionValue bm;
-
- MethodCall() { call = bm.getAMethodCall() }
-
- private CallableValue getCallableValue() { result = bm }
-
- override Node getArg(int n) {
- n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
- or
- n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
- }
-
- override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/**
- * Represents a call to a class.
- * The pre-update node for the call is inserted as argument to the `self` parameter.
- * That makes the call node be the post-update node holding the value of the object
- * after the constructor has run.
- */
-class ClassCall extends NormalCall {
- ClassValue c;
-
- ClassCall() {
- not c.isAbsent() and
- call = c.getACall()
- }
-
- private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
-
- override Node getArg(int n) {
- n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
- or
- n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
- }
-
- override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/** A call to a special method. */
-class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
- SpecialMethodCallNode special;
-
- SpecialCall() { this = TSpecialCall(special) }
-
- override string toString() { result = special.toString() }
-
- override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
-
- override ControlFlowNode getNode() { result = special }
-
- override DataFlowCallable getCallable() {
- result = TCallableValue(special.getResolvedSpecialMethod())
- }
-
- override DataFlowCallable getEnclosingCallable() {
- result.getScope() = special.getNode().getScope()
- }
-}
-
-/**
- * A call to a summarized callable, a `LibraryCallable`.
- *
- * We currently exclude all resolved calls. This means that a call to, say, `map`, which
- * is a `ClassCall`, cannot currently be given a summary.
- * We hope to lift this restriction in the future and include all potential calls to summaries
- * in this class.
- */
-class LibraryCall extends NormalCall {
- LibraryCall() {
- // TODO: share this with `resolvedCall`
- not (
- call = any(DataFlowCallableValue cv).getACall()
- or
- call = any(DataFlowLambda l).getACall()
- or
- // TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
- call = any(ClassValue c).getACall()
- )
- }
-
- // TODO: Implement Python calling convention?
- override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
-
- // We cannot refer to a `LibraryCallable` here,
- // as that could in turn refer to type tracking.
- // This call will be tied to a `LibraryCallable` via
- // `getViableCallabe` when the global data flow is assembled.
- override DataFlowCallable getCallable() { none() }
-}
-
-/**
- * A synthesized call inside a callable with a flow summary.
- *
- * For example, in
- * ```python
- * map(lambda x: x + 1, [1, 2, 3])
- * ```
- *
- * there is a synthesized call to the lambda argument inside `map`.
- */
-class SummaryCall extends DataFlowCall, TSummaryCall {
- private FlowSummaryImpl::Public::SummarizedCallable c;
- private Node receiver;
-
- SummaryCall() { this = TSummaryCall(c, receiver) }
-
- /** Gets the data flow node that this call targets. */
- Node getReceiver() { result = receiver }
-
- override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
-
- override DataFlowCallable getCallable() { none() }
-
- override Node getArg(int n) { none() }
-
- override ControlFlowNode getNode() { none() }
-
- override string toString() { result = "[summary] call to " + receiver + " in " + c }
-
- override Location getLocation() { none() }
-}
-
-/**
- * The value of a parameter at function entry, viewed as a node in a data
- * flow graph.
- */
-abstract class ParameterNodeImpl extends Node {
- abstract Parameter getParameter();
-
- /**
- * Holds if this node is the parameter of callable `c` at the
- * (zero-based) index `i`.
- */
- abstract predicate isParameterOf(DataFlowCallable c, int i);
-}
-
-/** A parameter for a library callable with a flow summary. */
-class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
- private FlowSummaryImpl::Public::SummarizedCallable sc;
- private int pos;
-
- SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
-
- override Parameter getParameter() { none() }
-
- override predicate isParameterOf(DataFlowCallable c, int i) {
- sc = c.asLibraryCallable() and i = pos
- }
-
- override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
-
- override string toString() { result = "parameter " + pos + " of " + sc }
-
- // Hack to return "empty location"
- override predicate hasLocationInfo(
- string file, int startline, int startcolumn, int endline, int endcolumn
- ) {
- file = "" and
- startline = 0 and
- startcolumn = 0 and
- endline = 0 and
- endcolumn = 0
- }
-}
-
-/** A data-flow node used to model flow summaries. */
-class SummaryNode extends Node, TSummaryNode {
- private FlowSummaryImpl::Public::SummarizedCallable c;
- private FlowSummaryImpl::Private::SummaryNodeState state;
-
- SummaryNode() { this = TSummaryNode(c, state) }
-
- override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
-
- override string toString() { result = "[summary] " + state + " in " + c }
-
- // Hack to return "empty location"
- override predicate hasLocationInfo(
- string file, int startline, int startcolumn, int endline, int endcolumn
- ) {
- file = "" and
- startline = 0 and
- startcolumn = 0 and
- endline = 0 and
- endcolumn = 0
- }
-}
-
-private class SummaryReturnNode extends SummaryNode, ReturnNode {
- private ReturnKind rk;
-
- SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
-
- override ReturnKind getKind() { result = rk }
-}
-
-private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
- SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
-
- override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
- FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
- }
-}
-
-private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
- private Node pre;
-
- SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
-
- override Node getPreUpdateNode() { result = pre }
-}
-
-/** Gets a viable run-time target for the call `call`. */
-DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
- result = call.getCallable()
- or
- // A call to a library callable with a flow summary
- // In this situation we can not resolve the callable from the call,
- // as that would make data flow depend on type tracking.
- // Instead we resolve the call from the summary.
- exists(LibraryCallable callable |
- result = TLibraryCallable(callable) and
- call.getNode() = callable.getACall().getNode()
- )
-}
-
-private newtype TReturnKind = TNormalReturnKind()
-
-/**
- * A return kind. A return kind describes how a value can be returned
- * from a callable. For Python, this is simply a method return.
- */
-class ReturnKind extends TReturnKind {
- /** Gets a textual representation of this element. */
- string toString() { result = "return" }
-}
-
-/** A data flow node that represents a value returned by a callable. */
-abstract class ReturnNode extends Node {
- /** Gets the kind of this return node. */
- ReturnKind getKind() { any() }
-}
-
-/** A data flow node that represents a value returned by a callable. */
-class ExtractedReturnNode extends ReturnNode, CfgNode {
- // See `TaintTrackingImplementation::returnFlowStep`
- ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
-
- override ReturnKind getKind() { any() }
-}
-
-/** A data-flow node that represents the output of a call. */
-abstract class OutNode extends Node {
- /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
- abstract DataFlowCall getCall(ReturnKind kind);
-}
-
-private module OutNodes {
- /**
- * A data-flow node that reads a value returned directly by a callable.
- */
- class ExprOutNode extends OutNode, ExprNode {
- private DataFlowCall call;
-
- ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
-
- override DataFlowCall getCall(ReturnKind kind) {
- result = call and
- kind = kind
- }
- }
-
- private class SummaryOutNode extends SummaryNode, OutNode {
- SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
-
- override DataFlowCall getCall(ReturnKind kind) {
- FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
- }
- }
-}
-
-/**
- * Gets a node that can read the value returned from `call` with return kind
- * `kind`.
- */
-OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
index 016f748a79a..c46cd74e3d4 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
// make it more digestible.
import MatchUnpacking
import IterableUnpacking
-import DataFlowDispatchPointsTo
+import DataFlowDispatch
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -39,162 +39,267 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
-/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
-deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
+// =============================================================================
+// SyntheticPreUpdateNode
+// =============================================================================
+class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
+ CallNode node;
-/** A module collecting the different reasons for synthesising a pre-update node. */
-module SyntheticPreUpdateNode {
- class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
- NeedsSyntheticPreUpdateNode post;
+ SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }
- SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
+ /** Gets the node for which this is a synthetic pre-update node. */
+ CfgNode getPostUpdateNode() { result.getNode() = node }
- /** Gets the node for which this is a synthetic pre-update node. */
- Node getPostUpdateNode() { result = post }
+ override string toString() { result = "[pre] " + node.toString() }
- override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
+ override Scope getScope() { result = node.getScope() }
- override Scope getScope() { result = post.getScope() }
-
- override Location getLocation() { result = post.getLocation() }
- }
-
- /** A data flow node for which we should synthesise an associated pre-update node. */
- class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
- NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
-
- override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
-
- /**
- * Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
- *
- * There is currently only one reason for needing a pre-update node, so we always use that as the label.
- */
- string label() { result = "objCreate" }
- }
-
- /**
- * Calls to constructors are treated as post-update nodes for the synthesized argument
- * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
- * object after the constructor (currently only `__init__`) has run.
- */
- CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
+ override Location getLocation() { result = node.getLocation() }
}
-import SyntheticPreUpdateNode
+// =============================================================================
+// *args (StarArgs) related
+// =============================================================================
+/**
+ * A (synthetic) data-flow parameter node to capture all positional arguments that
+ * should be passed to the `*args` parameter.
+ *
+ * To handle
+ * ```py
+ * def func(*args):
+ * for arg in args:
+ * sink(arg)
+ *
+ * func(source1, source2, ...)
+ * ```
+ *
+ * we add a synthetic parameter to `func` that accepts any positional argument at (or
+ * after) the index for the `*args` parameter. We add a store step (at any list index) to the real
+ * `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
+ * we would (wrongly) add flow for `source2` as well.
+ *
+ * To solve this more precisely, we could add a synthetic argument with position `*args`
+ * that had store steps with the correct index (like we do for mapping keyword arguments to a
+ * `**kwargs` parameter). However, if a single call could go to 2 different
+ * targets with `*args` parameters at different positions, as in the example below, it's unclear what
+ * index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
+ * So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
+ * one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
+ * branches based on the call-graph, so this strategy doesn't work.
+ *
+ * Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
+ * to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
+ * nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
+ * and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
+ * create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
+ * this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
+ * nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
+ * precise, it should be possible to do it like this.
+ *
+ * In PR review, @yoff suggested an alternative approach for more precise handling:
+ *
+ * - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
+ * - This is sent to a synthetic star parameter
+ * - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
+ *
+ * ```py
+ * def foo(one, *args): ...
+ * def bar(*args): ...
+ *
+ * func = foo if else bar
+ * func(1, 2, 3)
+ */
+class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
+ TSynthStarArgsElementParameterNode {
+ DataFlowCallable callable;
-/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
-deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
+ SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }
-/** A module collecting the different reasons for synthesising a post-update node. */
-module SyntheticPostUpdateNode {
- private import semmle.python.SpecialMethods
+ override string toString() { result = "SynthStarArgsElementParameterNode" }
- /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
- class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
- NeedsSyntheticPostUpdateNode pre;
+ override Scope getScope() { result = callable.getScope() }
- SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
+ override Location getLocation() { result = callable.getLocation() }
- override Node getPreUpdateNode() { result = pre }
-
- override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
-
- override Scope getScope() { result = pre.getScope() }
-
- override Location getLocation() { result = pre.getLocation() }
- }
-
- /** A data flow node for which we should synthesise an associated post-update node. */
- class NeedsSyntheticPostUpdateNode extends Node {
- NeedsSyntheticPostUpdateNode() {
- this = argumentPreUpdateNode()
- or
- this = storePreUpdateNode()
- or
- this = readPreUpdateNode()
- }
-
- /**
- * Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
- * We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
- */
- string label() {
- if this = argumentPreUpdateNode()
- then result = "arg"
- else
- if this = storePreUpdateNode()
- then result = "store"
- else result = "read"
- }
- }
-
- /**
- * Gets the pre-update node for this node.
- *
- * An argument might have its value changed as a result of a call.
- * Certain arguments, such as implicit self arguments are already post-update nodes
- * and should not have an extra node synthesised.
- */
- Node argumentPreUpdateNode() {
- result = any(FunctionCall c).getArg(_)
- or
- result = any(LambdaCall c).getArg(_)
- or
- // Avoid argument 0 of method calls as those have read post-update nodes.
- exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
- or
- result = any(SpecialCall c).getArg(_)
- or
- // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
- exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
- or
- // any argument of any call that we have not been able to resolve
- exists(CallNode call | not resolvedCall(call) |
- result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
- )
- }
-
- /** Holds if `call` can be resolved as a normal call */
- private predicate resolvedCall(CallNode call) {
- call = any(DataFlowCallableValue cv).getACall()
- or
- call = any(DataFlowLambda l).getACall()
- }
-
- /** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
- CfgNode storePreUpdateNode() {
- exists(Attribute a |
- result.getNode() = a.getObject().getAFlowNode() and
- a.getCtx() instanceof Store
- )
- }
-
- /**
- * Gets a node marking the state change of an object after a read.
- *
- * A reverse read happens when the result of a read is modified, e.g. in
- * ```python
- * l = [ mutable ]
- * l[0].mutate()
- * ```
- * we may now have changed the content of `l`. To track this, there must be
- * a postupdate node for `l`.
- */
- CfgNode readPreUpdateNode() {
- exists(Attribute a |
- result.getNode() = a.getObject().getAFlowNode() and
- a.getCtx() instanceof Load
- )
- or
- result.getNode() = any(SubscriptNode s).getObject()
- or
- // The dictionary argument is read from if the callable has parameters matching the keys.
- result.getNode().getNode() = any(Call call).getKwargs()
- }
+ override Parameter getParameter() { none() }
}
-import SyntheticPostUpdateNode
+predicate synthStarArgsElementParameterNodeStoreStep(
+ SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
+) {
+ c = c and // suppress warning about unused parameter
+ exists(DataFlowCallable callable, ParameterPosition ppos |
+ nodeFrom = TSynthStarArgsElementParameterNode(callable) and
+ nodeTo = callable.getParameter(ppos) and
+ ppos.isStarArgs(_)
+ )
+}
+
+// =============================================================================
+// **kwargs (DictSplat) related
+// =============================================================================
+/**
+ * A (synthetic) data-flow node that represents all keyword arguments, as if they had
+ * been passed in a `**kwargs` argument.
+ */
+class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
+ CallNode node;
+
+ SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
+
+ override string toString() { result = "SynthDictSplatArgumentNode" }
+
+ override Scope getScope() { result = node.getScope() }
+
+ override Location getLocation() { result = node.getLocation() }
+}
+
+private predicate synthDictSplatArgumentNodeStoreStep(
+ ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
+) {
+ exists(string name, CallNode call, ArgumentPosition keywordPos |
+ nodeTo = TSynthDictSplatArgumentNode(call) and
+ getCallArg(call, _, _, nodeFrom, keywordPos) and
+ keywordPos.isKeyword(name) and
+ c.getKey() = name
+ )
+}
+
+/**
+ * Ensures that the a `**kwargs` parameter will not contain elements with names of
+ * keyword parameters.
+ *
+ * For example, for the function below, it's not possible that the `kwargs` dictionary
+ * can contain an element with the name `a`, since that parameter can be given as a
+ * keyword argument.
+ *
+ * ```py
+ * def func(a, **kwargs):
+ * ...
+ * ```
+ */
+private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
+ exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
+ dictSplatPos.isDictSplat() and
+ (
+ n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+ or
+ n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
+ ) and
+ exists(callable.getParameter(keywordPos)) and
+ keywordPos.isKeyword(c.getKey())
+ )
+}
+
+/**
+ * A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
+ *
+ * Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
+ * with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
+ * and a read step to the `p1` parameter.
+ *
+ * ```py
+ * def foo(p1, p2): ...
+ *
+ * kwargs = {"p1": 42, "p2": 43}
+ * foo(**kwargs)
+ * ```
+ *
+ *
+ * Note that this will introduce a bit of redundancy in cases like
+ *
+ * ```py
+ * foo(p1=taint(1), p2=taint(2))
+ * ```
+ *
+ * where direct keyword matching is possible, since we construct a synthesized dict
+ * splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
+ * `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
+ * nodes (and similarly for `p2`). However, this redundancy is OK since
+ * (a) it means that type-tracking through keyword arguments also works in most cases,
+ * (b) read/store steps can be avoided when direct keyword matching is possible, and
+ * hence access path limits are not a concern, and
+ * (c) since the synthesized nodes are hidden, the reported data-flow paths will be
+ * collapsed anyway.
+ */
+class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
+ DataFlowCallable callable;
+
+ SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
+
+ override string toString() { result = "SynthDictSplatParameterNode" }
+
+ override Scope getScope() { result = callable.getScope() }
+
+ override Location getLocation() { result = callable.getLocation() }
+
+ override Parameter getParameter() { none() }
+}
+
+/**
+ * Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
+ * Due to restriction in dataflow library, we can only give one of them as result for
+ * `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
+ * _both_ of them.
+ */
+private predicate dictSplatParameterNodeFlowStep(
+ ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
+) {
+ exists(DataFlowCallable callable |
+ nodeFrom = TSynthDictSplatParameterNode(callable) and
+ (
+ nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+ or
+ exists(ParameterPosition pos |
+ nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
+ pos.isDictSplat()
+ )
+ )
+ )
+}
+
+/**
+ * Reads from the synthetic **kwargs parameter to each keyword parameter.
+ */
+predicate synthDictSplatParameterNodeReadStep(
+ SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
+) {
+ exists(DataFlowCallable callable, ParameterPosition ppos |
+ nodeFrom = TSynthDictSplatParameterNode(callable) and
+ nodeTo = callable.getParameter(ppos) and
+ ppos.isKeyword(c.getKey())
+ )
+}
+
+// =============================================================================
+// PostUpdateNode
+// =============================================================================
+abstract class PostUpdateNodeImpl extends Node {
+ /** Gets the node before the state update. */
+ abstract Node getPreUpdateNode();
+}
+
+class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
+ ControlFlowNode node;
+
+ SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
+
+ override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
+
+ override string toString() { result = "[post] " + node.toString() }
+
+ override Scope getScope() { result = node.getScope() }
+
+ override Location getLocation() { result = node.getLocation() }
+}
+
+class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
+ SyntheticPreUpdateNode pre;
+
+ NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
+
+ override Node getPreUpdateNode() { result = pre }
+}
class DataFlowExpr = Expr;
@@ -274,13 +379,6 @@ module EssaFlow {
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
- or
- // Overflow keyword argument
- exists(CallNode call, CallableValue callable |
- call = callable.getACall() and
- nodeTo = TKwOverflowNode(call, callable) and
- nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
- )
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -305,6 +403,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
or
summaryFlowSteps(nodeFrom, nodeTo)
+ or
+ dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
}
/**
@@ -521,15 +621,15 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
- posOverflowStoreStep(nodeFrom, c, nodeTo)
- or
- kwOverflowStoreStep(nodeFrom, c, nodeTo)
- or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
+ or
+ synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
+ or
+ synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
}
/**
@@ -669,30 +769,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
)
}
-/**
- * Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
- * at the position indicated by `c`.
- */
-predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
- exists(CallNode call, CallableValue callable, int n |
- nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
- nodeTo = TPosOverflowNode(call, callable) and
- c.getIndex() = n
- )
-}
-
-/**
- * Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
- * at the key indicated by `c`.
- */
-predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
- exists(CallNode call, CallableValue callable, string key |
- nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
- nodeTo = TKwOverflowNode(call, callable) and
- c.getKey() = key
- )
-}
-
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
exists(Function f, Parameter p, ParameterDefinition def |
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
@@ -722,9 +798,9 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
attributeReadStep(nodeFrom, c, nodeTo)
or
- kwUnpackReadStep(nodeFrom, c, nodeTo)
- or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
+ or
+ synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
}
/** Data flows from a sequence to a subscript of the sequence. */
@@ -814,43 +890,19 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}
-/**
- * Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
- * synthesized unpacked argument with the name indicated by `c`.
- */
-predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
- exists(CallNode call, string name |
- nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
- nodeTo = TKwUnpackedNode(call, _, name) and
- name = c.getKey()
- )
-}
-
-/**
- * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
- * whenever `call` unpacks `name`.
- */
-predicate kwOverflowClearStep(Node n, Content c) {
- exists(CallNode call, CallableValue callable, string name |
- call_unpacks(call, _, callable, name, _) and
- n = TKwOverflowNode(call, callable) and
- c.(DictionaryElementContent).getKey() = name
- )
-}
-
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
- kwOverflowClearStep(n, c)
- or
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
+ or
+ dictSplatParameterNodeClearStep(n, c)
}
/**
@@ -906,23 +958,24 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
+ or
+ n instanceof SynthStarArgsElementParameterNode
+ or
+ n instanceof SynthDictSplatArgumentNode
+ or
+ n instanceof SynthDictSplatParameterNode
}
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
- // lambda
+ // lambda and plain functions
kind = kind and
- creation.asExpr() = c.(DataFlowLambda).getDefinition()
- or
- // normal function
- exists(FunctionDef def |
- def.defines(creation.asVar().getSourceVariable()) and
- def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
- )
+ creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
or
// summarized function
+ exists(kind) and // avoid warning on unused 'kind'
exists(Call call |
creation.asExpr() = call.getAnArg() and
creation = c.(LibraryCallableValue).getACallback()
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
index 4a00d0aafc3..440ce3b70d4 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -31,10 +31,44 @@ newtype TNode =
or
node.getNode() instanceof Pattern
} or
- /** A synthetic node representing the value of an object before a state change */
- TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
- /** A synthetic node representing the value of an object after a state change. */
- TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
+ /**
+ * A synthetic node representing the value of an object before a state change.
+ *
+ * For class calls we pass a synthetic self argument, so attribute writes in
+ * `__init__` is reflected on the resulting object (we need special logic for this
+ * since there is no `return` in `__init__`)
+ */
+ // NOTE: since we can't rely on the call graph, but we want to have synthetic
+ // pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
+ // ALL calls :|
+ TSyntheticPreUpdateNode(CallNode call) or
+ /**
+ * A synthetic node representing the value of an object after a state change.
+ * See QLDoc for `PostUpdateNode`.
+ */
+ TSyntheticPostUpdateNode(ControlFlowNode node) {
+ exists(CallNode call |
+ node = call.getArg(_)
+ or
+ node = call.getArgByName(_)
+ or
+ // `self` argument when handling class instance calls (`__call__` special method))
+ node = call.getFunction()
+ )
+ or
+ node = any(AttrNode a).getObject()
+ or
+ node = any(SubscriptNode s).getObject()
+ or
+ // self parameter when used implicitly in `super()`
+ exists(Class cls, Function func, ParameterDefinition def |
+ func = cls.getAMethod() and
+ not isStaticmethod(func) and
+ // this matches what we do in ExtractedParameterNode
+ def.getDefiningNode() = node and
+ def.getParameter() = func.getArg(0)
+ )
+ } or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
@@ -45,37 +79,6 @@ newtype TNode =
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
- /**
- * A node representing the overflow positional arguments to a call.
- * That is, `call` contains more positional arguments than there are
- * positional parameters in `callable`. The extra ones are passed as
- * a tuple to a starred parameter; this synthetic node represents that tuple.
- */
- TPosOverflowNode(CallNode call, CallableValue callable) {
- exists(getPositionalOverflowArg(call, callable, _))
- } or
- /**
- * A node representing the overflow keyword arguments to a call.
- * That is, `call` contains keyword arguments for keys that do not have
- * keyword parameters in `callable`. These extra ones are passed as
- * a dictionary to a doubly starred parameter; this synthetic node
- * represents that dictionary.
- */
- TKwOverflowNode(CallNode call, CallableValue callable) {
- exists(getKeywordOverflowArg(call, callable, _))
- or
- ArgumentPassing::connects(call, callable) and
- exists(call.getNode().getKwargs()) and
- callable.getScope().hasKwArg()
- } or
- /**
- * A node representing an unpacked element of a dictionary argument.
- * That is, `call` contains argument `**{"foo": bar}` which is passed
- * to parameter `foo` of `callable`.
- */
- TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
- call_unpacks(call, _, callable, name, _)
- } or
/**
* A synthetic node representing that an iterable sequence flows to consumer.
*/
@@ -109,10 +112,18 @@ newtype TNode =
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
+ } or
+ /** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
+ TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
+ exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
+ } or
+ /** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
+ TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
+ /** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
+ TSynthDictSplatParameterNode(DataFlowCallable callable) {
+ exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
}
-class TParameterNode = TCfgNode or TSummaryParameterNode;
-
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
result.getScope() = s
@@ -288,7 +299,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
-class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
+class ParameterNode extends Node instanceof ParameterNodeImpl {
/** Gets the parameter corresponding to this node, if any. */
final Parameter getParameter() { result = super.getParameter() }
}
@@ -298,18 +309,8 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
//, LocalSourceNode {
ParameterDefinition def;
- ExtractedParameterNode() {
- node = def.getDefiningNode() and
- // Disregard parameters that we cannot resolve
- // TODO: Make this unnecessary
- exists(DataFlowCallable c | node = c.getParameter(_))
- }
+ ExtractedParameterNode() { node = def.getDefiningNode() }
- override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
-
- override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
-
- /** Gets the `Parameter` this `ParameterNode` represents. */
override Parameter getParameter() { result = def.getParameter() }
}
@@ -327,16 +328,24 @@ abstract class ArgumentNode extends Node {
final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
}
-/** A data flow node that represents a call argument found in the source code. */
+/**
+ * A data flow node that represents a call argument found in the source code.
+ */
class ExtractedArgumentNode extends ArgumentNode {
- ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
-
- final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
- this.extractedArgumentOf(call, pos)
+ ExtractedArgumentNode() {
+ // for resolved calls, we need to allow all argument nodes
+ getCallArg(_, _, _, this, _)
+ or
+ // for potential summaries we allow all normal call arguments
+ normalCallArg(_, this, _)
+ or
+ // and self arguments
+ this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
}
- predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
- this = call.getArg(pos)
+ final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
+ this = call.getArgument(pos) and
+ call instanceof ExtractedDataFlowCall
}
}
@@ -345,16 +354,17 @@ class ExtractedArgumentNode extends ArgumentNode {
* changed its state.
*
* This can be either the argument to a callable after the callable returns
- * (which might have mutated the argument), or the qualifier of a field after
- * an update to the field.
+ * (which might have mutated the argument), the qualifier of a field after
+ * an update to the field, or a container such as a list/dictionary after an element
+ * update.
*
* Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
- * to the value before the update with the exception of `ObjectCreationNode`s,
+ * to the value before the update with the exception of class calls,
* which represents the value _after_ the constructor has run.
*/
-abstract class PostUpdateNode extends Node {
+class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
/** Gets the node before the state update. */
- abstract Node getPreUpdateNode();
+ Node getPreUpdateNode() { result = super.getPreUpdateNode() }
}
/**
@@ -448,70 +458,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
)
}
-/**
- * The node holding the extra positional arguments to a call. This node is passed as a tuple
- * to the starred parameter of the callable.
- */
-class PosOverflowNode extends Node, TPosOverflowNode {
- CallNode call;
-
- PosOverflowNode() { this = TPosOverflowNode(call, _) }
-
- override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node holding the extra keyword arguments to a call. This node is passed as a dictionary
- * to the doubly starred parameter of the callable.
- */
-class KwOverflowNode extends Node, TKwOverflowNode {
- CallNode call;
-
- KwOverflowNode() { this = TKwOverflowNode(call, _) }
-
- override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node representing the synthetic argument of a call that is unpacked from a dictionary
- * argument.
- */
-class KwUnpackedNode extends Node, TKwUnpackedNode {
- CallNode call;
- string name;
-
- KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
-
- override string toString() { result = "KwUnpacked " + name }
-
- override DataFlowCallable getEnclosingCallable() {
- exists(Node node |
- node = TCfgNode(call) and
- result = node.getEnclosingCallable()
- )
- }
-
- override Location getLocation() { result = call.getLocation() }
-}
-
/**
* A synthetic node representing an iterable sequence. Used for changing content type
* for instance from a `ListElement` to a `TupleElement`, especially if the content is
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
index 2d731d72e99..dcf4d987e92 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
@@ -61,11 +61,11 @@ bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
- * Gets the type of the `i`th parameter in a synthesized call that targets a
- * callback of type `t`.
+ * Gets the type of the parameter matching arguments at position `pos` in a
+ * synthesized call that targets a callback of type `t`.
*/
-bindingset[t, i]
-DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
+bindingset[t, pos]
+DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
@@ -114,10 +114,34 @@ string getComponentSpecific(SummaryComponent sc) {
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
-string getParameterPosition(ParameterPosition pos) { result = pos.toString() }
+string getParameterPosition(ParameterPosition pos) {
+ pos.isSelf() and result = "self"
+ or
+ exists(int i |
+ pos.isPositional(i) and
+ result = i.toString()
+ )
+ or
+ exists(string name |
+ pos.isKeyword(name) and
+ result = name + ":"
+ )
+}
/** Gets the textual representation of an argument position in the format used for flow summaries. */
-string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() }
+string getArgumentPosition(ArgumentPosition pos) {
+ pos.isSelf() and result = "self"
+ or
+ exists(int i |
+ pos.isPositional(i) and
+ result = i.toString()
+ )
+ or
+ exists(string name |
+ pos.isKeyword(name) and
+ result = name + ":"
+ )
+}
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -197,29 +221,55 @@ module ParsePositions {
)
}
- predicate isParsedParameterPosition(string c, int i) {
+ predicate isParsedPositionalParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
- predicate isParsedArgumentPosition(string c, int i) {
+ predicate isParsedKeywordParameterPosition(string c, string paramName) {
+ isParamBody(c) and
+ c = paramName + ":"
+ }
+
+ predicate isParsedPositionalArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
+
+ predicate isParsedKeywordArgumentPosition(string c, string argName) {
+ isArgBody(c) and
+ c = argName + ":"
+ }
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
- ParsePositions::isParsedParameterPosition(s, i) and
+ ParsePositions::isParsedPositionalParameterPosition(s, i) and
result.isPositional(i)
)
+ or
+ exists(string name |
+ ParsePositions::isParsedKeywordParameterPosition(s, name) and
+ result.isKeyword(name)
+ )
+ or
+ s = "self" and
+ result.isSelf()
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
- ParsePositions::isParsedArgumentPosition(s, i) and
+ ParsePositions::isParsedPositionalArgumentPosition(s, i) and
result.isPositional(i)
)
+ or
+ exists(string name |
+ ParsePositions::isParsedKeywordArgumentPosition(s, name) and
+ result.isKeyword(name)
+ )
+ or
+ s = "self" and
+ result.isSelf()
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll
index e4af21caacc..7af9ca524aa 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll
@@ -71,13 +71,19 @@ module ImportResolution {
*/
pragma[nomagic]
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
- exists(EssaVariable v |
+ exists(EssaVariable v, EssaDefinition essaDef |
v.getName() = name and
- v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
+ v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
+ (
+ essaDef = v.getDefinition()
+ or
+ // to handle definitions guarded by if-then-else
+ essaDef = v.getDefinition().(PhiFunction).getAnInput()
+ )
|
- defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
+ defn.getNode() = essaDef.(AssignmentDefinition).getValue()
or
- defn.getNode() = v.getDefinition().(ArgumentRefinement).getArgument()
+ defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
)
or
exists(Alias a |
@@ -167,8 +173,22 @@ module ImportResolution {
)
}
+ /**
+ * Gets the (most likely) module for the name `name`, if any.
+ *
+ * Handles the fact that for the name `` representing a package the actual module
+ * is `.__init__`.
+ *
+ * See `isPreferredModuleForName` for more details on what "most likely" module means.
+ */
+ pragma[inline]
+ private Module getModuleFromName(string name) {
+ isPreferredModuleForName(result.getFile(), name + ["", ".__init__"])
+ }
+
+ /** Gets the module from which attributes are imported by `i`. */
Module getModuleImportedByImportStar(ImportStar i) {
- isPreferredModuleForName(result.getFile(), i.getImportedModuleName())
+ result = getModuleFromName(i.getImportedModuleName())
}
/**
@@ -223,7 +243,7 @@ module ImportResolution {
exists(string module_name | result = getReferenceToModuleName(module_name) |
// Depending on whether the referenced module is a package or not, we may need to add a
// trailing `.__init__` to the module name.
- isPreferredModuleForName(m.getFile(), module_name + ["", ".__init__"])
+ m = getModuleFromName(module_name)
or
// Module defined via `sys.modules`
m = sys_modules_module_with_name(module_name)
@@ -234,7 +254,7 @@ module ImportResolution {
ar.accesses(getModuleReference(p), attr_name) and
result = ar
|
- isPreferredModuleForName(m.getFile(), p.getPackageName() + "." + attr_name + ["", ".__init__"])
+ m = getModuleFromName(p.getPackageName() + "." + attr_name)
)
or
// This is also true for attributes that come from reexports.
@@ -248,8 +268,7 @@ module ImportResolution {
exists(string submodule, Module package |
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
- isPreferredModuleForName(m.getFile(),
- package.getPackageName() + "." + submodule + ["", ".__init__"])
+ m = getModuleFromName(package.getPackageName() + "." + submodule)
)
}
diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
index 690216089e9..67e3db984e8 100644
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -60,22 +60,6 @@ string getPossibleContentName() {
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
-/**
- * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
- *
- * Helper predicate to avoid bad join order experienced in `callStep`.
- * This happened when `isParameterOf` was joined _before_ `getCallable`.
- */
-pragma[nomagic]
-private DataFlowPrivate::DataFlowCallable getCallableForArgument(
- DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
-) {
- exists(DataFlowPrivate::ExtractedDataFlowCall call |
- nodeFrom.extractedArgumentOf(call, i) and
- result = call.getCallable()
- )
-}
-
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
@@ -83,11 +67,15 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
-predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
- // TODO: Support special methods?
- exists(DataFlowPrivate::DataFlowCallable callable, int i |
- callable = getCallableForArgument(nodeFrom, i) and
- nodeTo.isParameterOf(callable, i)
+predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
+ exists(
+ DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
+ DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
+ |
+ nodeFrom = call.getArgument(apos) and
+ nodeTo = callable.getParameter(ppos) and
+ DataFlowPrivate::parameterMatch(ppos, apos) and
+ callable = call.getCallable()
)
}
diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
index 3fce979c147..4a4b34af422 100644
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -1465,7 +1465,19 @@ private module StdlibPrivate {
t.start() and
result = openCall and
(
- openCall instanceof OpenCall
+ openCall instanceof OpenCall and
+ // don't include the open call inside of Path.open in pathlib.py since
+ // the call to `path_obj.open` is covered by `PathLibOpenCall`.
+ not exists(Module mod, Class cls, Function func |
+ openCall.(OpenCall).asCfgNode().getScope() = func and
+ func.getName() = "open" and
+ func.getScope() = cls and
+ cls.getName() = "Path" and
+ cls.getScope() = mod and
+ mod.getName() = "pathlib" and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(mod.getFile().getRelativePath())
+ )
or
openCall instanceof PathLibOpenCall
)
@@ -2669,6 +2681,7 @@ private module StdlibPrivate {
HashlibNewCall() {
this = hashlibNewCall(hashName) and
+ // we only want to consider it as an cryptographic operation if the input is available
exists(this.getParameter(1, "data"))
}
@@ -2751,6 +2764,78 @@ private module StdlibPrivate {
}
}
+ // ---------------------------------------------------------------------------
+ // hmac
+ // ---------------------------------------------------------------------------
+ abstract class HmacCryptographicOperation extends Cryptography::CryptographicOperation::Range,
+ API::CallNode {
+ abstract API::Node getDigestArg();
+
+ override Cryptography::CryptographicAlgorithm getAlgorithm() {
+ exists(string algorithmName | result.matchesName(algorithmName) |
+ this.getDigestArg().asSink() = hashlibMember(algorithmName).asSource()
+ or
+ this.getDigestArg().getAValueReachingSink().asExpr().(StrConst).getText() = algorithmName
+ )
+ }
+
+ override Cryptography::BlockMode getBlockMode() { none() }
+ }
+
+ API::CallNode getHmacConstructorCall(API::Node digestArg) {
+ result = API::moduleImport("hmac").getMember(["new", "HMAC"]).getACall() and
+ digestArg = result.getParameter(2, "digestmod")
+ }
+
+ /**
+ * A call to `hmac.new`/`hmac.HMAC`.
+ *
+ * See https://docs.python.org/3.11/library/hmac.html#hmac.new
+ */
+ class HmacNewCall extends HmacCryptographicOperation {
+ API::Node digestArg;
+
+ HmacNewCall() {
+ this = getHmacConstructorCall(digestArg) and
+ // we only want to consider it as an cryptographic operation if the input is available
+ exists(this.getParameter(1, "msg").asSink())
+ }
+
+ override API::Node getDigestArg() { result = digestArg }
+
+ override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
+ }
+
+ /**
+ * A call to `.update` on an HMAC object.
+ *
+ * See https://docs.python.org/3.11/library/hmac.html#hmac.HMAC.update
+ */
+ class HmacUpdateCall extends HmacCryptographicOperation {
+ API::Node digestArg;
+
+ HmacUpdateCall() {
+ this = getHmacConstructorCall(digestArg).getReturn().getMember("update").getACall()
+ }
+
+ override API::Node getDigestArg() { result = digestArg }
+
+ override DataFlow::Node getAnInput() { result = this.getParameter(0, "msg").asSink() }
+ }
+
+ /**
+ * A call to `hmac.digest`.
+ *
+ * See https://docs.python.org/3.11/library/hmac.html#hmac.digest
+ */
+ class HmacDigestCall extends HmacCryptographicOperation {
+ HmacDigestCall() { this = API::moduleImport("hmac").getMember("digest").getACall() }
+
+ override API::Node getDigestArg() { result = this.getParameter(2, "digest") }
+
+ override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
+ }
+
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll
index 290a90f5a73..40dda556caa 100644
--- a/python/ql/lib/semmle/python/internal/CachedStages.qll
+++ b/python/ql/lib/semmle/python/internal/CachedStages.qll
@@ -93,6 +93,8 @@ module Stages {
exists(PyFlow::DefinitionNode b)
or
exists(any(PyFlow::SequenceNode n).getElement(_))
+ or
+ exists(any(PyFlow::ControlFlowNode c).toString())
}
}
@@ -125,6 +127,45 @@ module Stages {
}
}
+ /**
+ * The points-to stage.
+ */
+ cached
+ module PointsTo {
+ /**
+ * Always holds.
+ * Ensures that a predicate is evaluated as part of the points-to stage.
+ */
+ cached
+ predicate ref() { 1 = 1 }
+
+ private import semmle.python.pointsto.Base as PointsToBase
+ private import semmle.python.types.Object as TypeObject
+ private import semmle.python.objects.TObject as TObject
+ private import semmle.python.objects.ObjectInternal as ObjectInternal
+ // have to alias since this module is also called PointsTo
+ private import semmle.python.pointsto.PointsTo as RealPointsTo
+
+ /**
+ * DONT USE!
+ * Contains references to each predicate that use the above `ref` predicate.
+ */
+ cached
+ predicate backref() {
+ 1 = 1
+ or
+ PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
+ or
+ exists(TypeObject::Object a)
+ or
+ exists(TObject::TObject f)
+ or
+ exists(any(ObjectInternal::ObjectInternal o).toString())
+ or
+ RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
+ }
+ }
+
/**
* The `dataflow` stage.
*/
@@ -138,14 +179,9 @@ module Stages {
predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
+ private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited
- private import semmle.python.pointsto.Base as PointsToBase
- private import semmle.python.types.Object as TypeObject
- private import semmle.python.objects.TObject as TObject
- private import semmle.python.Flow as Flow
- private import semmle.python.objects.ObjectInternal as ObjectInternal
- private import semmle.python.pointsto.PointsTo as PointsTo
/**
* DONT USE!
@@ -159,21 +195,13 @@ module Stages {
or
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
or
+ DataFlowDispatch::resolveCall(_, _, _)
+ or
+ DataFlowDispatch::getCallArg(_, _, _, _, _)
+ or
any(LocalSources::LocalSourceNode n).flowsTo(_)
or
exists(Awaited::awaited(_))
- or
- PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
- or
- exists(TypeObject::Object a)
- or
- exists(TObject::TObject f)
- or
- exists(any(Flow::ControlFlowNode c).toString())
- or
- exists(any(ObjectInternal::ObjectInternal o).toString())
- or
- PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
}
}
}
diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
index 2f6c8bb8b29..e86b156e204 100644
--- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
@@ -45,8 +45,12 @@ module Cryptography {
/**
* Gets the block mode used to perform this cryptographic operation.
- * This may have no result - for example if the `CryptographicAlgorithm` used
- * is a stream cipher rather than a block cipher.
+ *
+ * This predicate is only expected to have a result if two conditions hold:
+ * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
+ * 2. The algorithm used is a block cipher (not a stream cipher).
+ *
+ * If either of these conditions do not hold, then this predicate should have no result.
*/
BlockMode getBlockMode() { result = super.getBlockMode() }
}
@@ -69,8 +73,12 @@ module Cryptography {
/**
* Gets the block mode used to perform this cryptographic operation.
- * This may have no result - for example if the `CryptographicAlgorithm` used
- * is a stream cipher rather than a block cipher.
+ *
+ * This predicate is only expected to have a result if two conditions hold:
+ * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
+ * 2. The algorithm used is a block cipher (not a stream cipher).
+ *
+ * If either of these conditions do not hold, then this predicate should have no result.
*/
abstract BlockMode getBlockMode();
}
@@ -81,10 +89,21 @@ module Cryptography {
* data of arbitrary length using a block encryption algorithm.
*/
class BlockMode extends string {
- BlockMode() { this = ["ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP"] }
+ BlockMode() {
+ this =
+ [
+ "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP",
+ "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final
+ "EAX" // https://en.wikipedia.org/wiki/EAX_mode
+ ]
+ }
/** Holds if this block mode is considered to be insecure. */
predicate isWeak() { this = "ECB" }
+
+ /** Holds if the given string appears to match this block mode. */
+ bindingset[s]
+ predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
}
}
diff --git a/python/ql/lib/semmle/python/objects/ObjectInternal.qll b/python/ql/lib/semmle/python/objects/ObjectInternal.qll
index b6725e87cb6..a58b8b5f0a9 100644
--- a/python/ql/lib/semmle/python/objects/ObjectInternal.qll
+++ b/python/ql/lib/semmle/python/objects/ObjectInternal.qll
@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }
override string toString() {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
result = this.getBuiltin().getClass().getName() + " object"
}
diff --git a/python/ql/lib/semmle/python/pointsto/Base.qll b/python/ql/lib/semmle/python/pointsto/Base.qll
index a3407419da2..96437cfed7e 100644
--- a/python/ql/lib/semmle/python/pointsto/Base.qll
+++ b/python/ql/lib/semmle/python/pointsto/Base.qll
@@ -318,7 +318,7 @@ module BaseFlow {
predicate scope_entry_value_transfer_from_earlier(
EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
exists(SsaSourceVariable var |
essa_var_scope(var, pred_scope, pred_var) and
scope_entry_def_scope(var, succ_scope, succ_def)
diff --git a/python/ql/lib/semmle/python/pointsto/PointsTo.qll b/python/ql/lib/semmle/python/pointsto/PointsTo.qll
index 6068d7308c9..1369d6d6ce7 100644
--- a/python/ql/lib/semmle/python/pointsto/PointsTo.qll
+++ b/python/ql/lib/semmle/python/pointsto/PointsTo.qll
@@ -2566,7 +2566,7 @@ module AttributePointsTo {
predicate variableAttributePointsTo(
EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
or
exists(EssaVariable prev |
diff --git a/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
index fcf8885f3f4..ae61bd04314 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
@@ -57,16 +57,43 @@ module CleartextLogging {
/** A piece of data printed, considered as a flow sink. */
class PrintedDataAsSink extends Sink {
PrintedDataAsSink() {
- this = API::builtin("print").getACall().getArg(_)
- or
- // special handling of writing to `sys.stdout` and `sys.stderr`, which is
- // essentially the same as printing
- this =
- API::moduleImport("sys")
- .getMember(["stdout", "stderr"])
- .getMember("write")
- .getACall()
- .getArg(0)
+ (
+ this = API::builtin("print").getACall().getArg(_)
+ or
+ // special handling of writing to `sys.stdout` and `sys.stderr`, which is
+ // essentially the same as printing
+ this =
+ API::moduleImport("sys")
+ .getMember(["stdout", "stderr"])
+ .getMember("write")
+ .getACall()
+ .getArg(0)
+ ) and
+ // since some of the inner error handling implementation of the logging module is
+ // ```py
+ // sys.stderr.write('Message: %r\n'
+ // 'Arguments: %s\n' % (record.msg,
+ // record.args))
+ // ```
+ // any time we would report flow to such a logging sink, we can ALSO report
+ // the flow to the `record.msg`/`record.args` sinks -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `logging.info` calls in the following example
+ // due to use-use flow
+ // ```py
+ // logging.info(user_controlled)
+ // logging.info(user_controlled)
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module loggingInit |
+ loggingInit.getName() = "logging.__init__" and
+ this.getScope().getEnclosingModule() = loggingInit and
+ // do allow this call if we're analyzing logging/__init__.py as part of CPython though
+ not exists(loggingInit.getFile().getRelativePath())
+ )
}
}
}
diff --git a/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
index 0ff32823d68..001b9395ef4 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
@@ -50,7 +50,34 @@ module CleartextStorage {
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
- FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
+ FileWriteDataAsSink() {
+ this = any(FileSystemWriteAccess write).getADataNode() and
+ // since implementation of Path.write_bytes in pathlib.py is like
+ // ```py
+ // def write_bytes(self, data):
+ // with self.open(mode='wb') as f:
+ // return f.write(data)
+ // ```
+ // any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
+ // the flow from the `data` parameter to the `f.write` sink -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `p.write` calls in the following example
+ // due to use-use flow
+ // ```py
+ // p.write(user_controlled)
+ // p.write(user_controlled)
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module pathlib |
+ pathlib.getName() = "pathlib" and
+ this.getScope().getEnclosingModule() = pathlib and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(pathlib.getFile().getRelativePath())
+ )
+ }
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */
diff --git a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
index a18bfe73372..d43095a04f8 100644
--- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
@@ -76,6 +76,9 @@ module CommandInjection {
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
+ //
+ // The same approach is used in the path-injection, cleartext-storage, and
+ // cleartext-logging queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]
diff --git a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
index a96bbb996bc..b50ff70fde2 100644
--- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
@@ -58,7 +58,33 @@ module PathInjection {
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
- FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
+ FileSystemAccessAsSink() {
+ this = any(FileSystemAccess e).getAPathArgument() and
+ // since implementation of Path.open in pathlib.py is like
+ // ```py
+ // def open(self, ...):
+ // return io.open(self, ...)
+ // ```
+ // any time we would report flow to the `path_obj.open` sink, we can ALSO report
+ // the flow from the `self` parameter to the `io.open` sink -- obviously we
+ // don't want that.
+ //
+ // However, simply removing taint edges out of a sink is not a good enough solution,
+ // since we would only flag one of the `p.open` calls in the following example
+ // due to use-use flow
+ // ```py
+ // p.open()
+ // p.open()
+ // ```
+ //
+ // The same approach is used in the command injection query.
+ not exists(Module pathlib |
+ pathlib.getName() = "pathlib" and
+ this.getScope().getEnclosingModule() = pathlib and
+ // do allow this call if we're analyzing pathlib.py as part of CPython though
+ not exists(pathlib.getFile().getRelativePath())
+ )
+ }
}
private import semmle.python.frameworks.data.ModelsAsData
diff --git a/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
index 9a31dcc2a10..c454ff8e994 100644
--- a/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
@@ -41,7 +41,32 @@ module StackTraceExposure {
/**
* A source of exception info, considered as a flow source.
*/
- class ExceptionInfoAsSource extends Source instanceof ExceptionInfo { }
+ class ExceptionInfoAsSource extends Source instanceof ExceptionInfo {
+ ExceptionInfoAsSource() {
+ // since `traceback.format_exc()` in Python 2 is internally implemented as
+ // ```py
+ // def format_exc(limit=None):
+ // """Like print_exc() but return a string."""
+ // try:
+ // etype, value, tb = sys.exc_info()
+ // return ''.join(format_exception(etype, value, tb, limit))
+ // finally:
+ // etype = value = tb = None
+ // ```
+ // any time we would report flow to such from a call to format_exc, we can ALSO report
+ // the flow from the `sys.exc_info()` source -- obviously we don't want that.
+ //
+ //
+ // To avoid this, we use the same approach as for sinks in the command injection
+ // query (and others).
+ not exists(Module traceback |
+ traceback.getName() = "traceback" and
+ this.getScope().getEnclosingModule() = traceback and
+ // do allow this call if we're analyzing traceback.py as part of CPython though
+ not exists(traceback.getFile().getRelativePath())
+ )
+ }
+ }
/**
* The body of a HTTP response that will be returned from a server, considered as a flow sink.
diff --git a/python/ql/lib/semmle/python/types/Builtins.qll b/python/ql/lib/semmle/python/types/Builtins.qll
index 066b496b80e..e6a21e1b717 100644
--- a/python/ql/lib/semmle/python/types/Builtins.qll
+++ b/python/ql/lib/semmle/python/types/Builtins.qll
@@ -111,11 +111,7 @@ class Builtin extends @py_cobject {
}
module Builtin {
- Builtin builtinModule() {
- py_special_objects(result, "builtin_module_2") and major_version() = 2
- or
- py_special_objects(result, "builtin_module_3") and major_version() = 3
- }
+ Builtin builtinModule() { py_special_objects(result, "builtin_module") }
Builtin builtin(string name) { result = builtinModule().getMember(name) }
diff --git a/python/ql/lib/semmle/python/types/Object.qll b/python/ql/lib/semmle/python/types/Object.qll
index e0d252929f9..b408fc7ba1c 100644
--- a/python/ql/lib/semmle/python/types/Object.qll
+++ b/python/ql/lib/semmle/python/types/Object.qll
@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages
cached
private predicate is_an_object(@py_object obj) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
/* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
obj instanceof ControlFlowNode and
not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
@@ -78,7 +78,7 @@ class Object extends @py_object {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
this.hasOrigin() and
this.getOrigin()
.getLocation()
@@ -98,7 +98,7 @@ class Object extends @py_object {
/** Gets a textual representation of this element. */
cached
string toString() {
- Stages::DataFlow::ref() and
+ Stages::PointsTo::ref() and
not this = undefinedVariable() and
not this = unknownValue() and
exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |
diff --git a/python/ql/src/CHANGELOG.md b/python/ql/src/CHANGELOG.md
index 6199749411d..eace5e34204 100644
--- a/python/ql/src/CHANGELOG.md
+++ b/python/ql/src/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.6.3
+
+No user-facing changes.
+
## 0.6.2
No user-facing changes.
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
index 85b0a730cf1..94762ace98c 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -1,48 +1,36 @@
/**
* Definitions for reasoning about untrusted data used in APIs defined outside the
- * database.
+ * user-written code.
*/
-import python
+private import python
import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.TaintTracking
-import semmle.python.Concepts
-import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.TaintTracking
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
-private import semmle.python.types.Builtins
-private import semmle.python.objects.ObjectInternal
-// IMPLEMENTATION NOTES:
-//
-// This query uses *both* the new data-flow library, and points-to. Why? To get this
-// finished quickly, so it can provide value for our field team and ourselves.
-//
-// In the long run, it should not need to use points-to for anything. Possibly this can
-// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
-// library to be fully operational.
-//
-// At least it will allow us to provide a baseline comparison against a solution that
-// doesn't use points-to at all
-//
-// There is a few dirty things we do here:
-// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
-// publicly, but we really want access to them.
-// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
-// `DataFlowCallable`
-// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
-// really wanted to polish our points-to implementation, we could move this
-// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
-// probably require some more work: for this query, it's totally ok to use
-// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
-// figure out if that is desirable in general. I simply skipped a corner here!
-// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
- * A callable that is considered a "safe" external API from a security perspective.
+ * An external API that is considered "safe" from a security perspective.
*/
class SafeExternalApi extends Unit {
- /** Gets a callable that is considered a "safe" external API from a security perspective. */
- abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
+ /**
+ * Gets a call that is considered "safe" from a security perspective. You can use API
+ * graphs to find calls to functions you know are safe.
+ *
+ * Which works even when the external library isn't extracted.
+ */
+ abstract DataFlow::CallCfgNode getSafeCall();
+
+ /**
+ * Gets a callable that is considered a "safe" external API from a security
+ * perspective.
+ *
+ * You probably want to define this as `none()` and use `getSafeCall` instead, since
+ * that can handle the external library not being extracted.
+ */
+ DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
}
/** DEPRECATED: Alias for SafeExternalApi */
@@ -50,42 +38,127 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi {
- override DataFlowPrivate::DataFlowCallable getSafeCallable() {
- exists(CallableValue cv | cv = result.getCallableValue() |
- cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
- or
- exists(ClassValue cls, string attr |
- cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
- |
- cls.lookup(attr) = cv
- )
+ override DataFlow::CallCfgNode getSafeCall() {
+ result =
+ API::builtin([
+ "len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
+ "str", "type"
+ ]).getACall()
+ }
+}
+
+/**
+ * Gets a human readable representation of `node`.
+ *
+ * Note that this is only defined for API nodes that are allowed as external APIs,
+ * so `None.json.dumps` will for example not be allowed.
+ */
+string apiNodeToStringRepr(API::Node node) {
+ node = API::builtin(result)
+ or
+ node = API::moduleImport(result)
+ or
+ exists(API::Node base, string basename |
+ base.getDepth() < node.getDepth() and
+ basename = apiNodeToStringRepr(base) and
+ not base = API::builtin(["None", "True", "False"])
+ |
+ exists(string m | node = base.getMember(m) | result = basename + "." + m)
+ or
+ node = base.getReturn() and
+ result = basename + "()" and
+ not base.getACall() = any(SafeExternalApi safe).getSafeCall()
+ or
+ node = base.getAwaited() and
+ result = basename
+ )
+}
+
+predicate resolvedCall(CallNode call) {
+ DataFlowPrivate::resolveCall(call, _, _) or
+ DataFlowPrivate::resolveClassCall(call, _)
+}
+
+newtype TInterestingExternalApiCall =
+ TUnresolvedCall(DataFlow::CallCfgNode call) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ not resolvedCall(call.getNode()) and
+ not call = any(SafeExternalApi safe).getSafeCall()
+ } or
+ TResolvedCall(DataFlowPrivate::DataFlowCall call) {
+ exists(call.getLocation().getFile().getRelativePath()) and
+ exists(call.getCallable()) and
+ not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
+ // ignore calls inside codebase, and ignore calls that are marked as safe. This is
+ // only needed as long as we extract dependencies. When we stop doing that, all
+ // targets of resolved calls will be from user-written code.
+ not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
+ not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
+ any(SafeExternalApi safe).getSafeCall() = callCfgNode
+ )
+ }
+
+abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
+ /** Gets the argument at position `apos`, if any */
+ abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
+
+ /** Gets a textual representation of this element. */
+ abstract string toString();
+
+ /**
+ * Gets a human-readable name for the external API.
+ */
+ abstract string getApiName();
+}
+
+class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
+ DataFlow::CallCfgNode call;
+
+ UnresolvedCall() { this = TUnresolvedCall(call) }
+
+ override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+ exists(int i | apos.isPositional(i) | result = call.getArg(i))
+ or
+ exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
+ }
+
+ override string toString() {
+ result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
+ }
+
+ override string getApiName() {
+ exists(API::Node apiNode |
+ result = apiNodeToStringRepr(apiNode) and
+ apiNode.getACall() = call
+ )
+ }
+}
+
+class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
+ DataFlowPrivate::DataFlowCall dfCall;
+
+ ResolvedCall() { this = TResolvedCall(dfCall) }
+
+ override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+ result = dfCall.getArgument(apos)
+ }
+
+ override string toString() {
+ result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
+ }
+
+ override string getApiName() {
+ exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
+ result = apiNodeToStringRepr(apiNode) and
+ apiNode.getACall() = call
)
}
}
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
- DataFlowPrivate::DataFlowCallable callable;
- int i;
-
ExternalApiDataNode() {
- exists(DataFlowPrivate::DataFlowCall call |
- exists(call.getLocation().getFile().getRelativePath())
- |
- callable = call.getCallable() and
- // TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
- this = call.getArg(i)
- ) and
- not any(SafeExternalApi safe).getSafeCallable() = callable and
- exists(Value cv | cv = callable.getCallableValue() |
- cv.isAbsent()
- or
- cv.isBuiltin()
- or
- cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
- or
- not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
- ) and
+ exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
// Not already modeled as a taint step
not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
@@ -95,12 +168,6 @@ class ExternalApiDataNode extends DataFlow::Node {
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
)
}
-
- /** Gets the index for the parameter that will receive this untrusted data */
- int getIndex() { result = i }
-
- /** Gets the callable to which this argument is passed. */
- DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +200,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */
private newtype TExternalApi =
- /** An untrusted API method `m` where untrusted data is passed at `index`. */
- TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
- exists(UntrustedExternalApiDataNode n |
- callable = n.getCallable() and
- index = n.getIndex()
+ MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
+ exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
+ ex = call.getArgument(apos) and
+ repr = call.getApiName()
)
}
-/** An external API which is used with untrusted data. */
-class ExternalApiUsedWithUntrustedData extends TExternalApi {
+/** A argument of an external API which is used with untrusted data. */
+class ExternalApiUsedWithUntrustedData extends MkExternalApi {
+ string repr;
+ DataFlowPrivate::ArgumentPosition apos;
+
+ ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
+
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalApiDataNode getUntrustedDataNode() {
- this = TExternalApiParameter(result.getCallable(), result.getIndex())
+ exists(InterestingExternalApiCall call |
+ result = call.getArgument(apos) and
+ call.getApiName() = repr
+ )
}
/** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +228,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
}
/** Gets a textual representation of this element. */
- string toString() {
- exists(
- DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
- string indexString
- |
- this = TExternalApiParameter(callable, index) and
- indexString = "param " + index and
- exists(CallableValue cv | cv = callable.getCallableValue() |
- callableString =
- cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
- or
- not exists(cv.getScope()) and
- (
- cv instanceof BuiltinFunctionValue and
- callableString = pretty_builtin_function_value(cv)
- or
- cv instanceof BuiltinMethodValue and
- callableString = pretty_builtin_method_value(cv)
- or
- not cv instanceof BuiltinFunctionValue and
- not cv instanceof BuiltinMethodValue and
- callableString = cv.toString()
- )
- ) and
- result = callableString + " [" + indexString + "]"
- )
- }
+ string toString() { result = repr + " [" + apos + "]" }
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
-
-/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
-private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
- exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
- result = prefix_with_module_if_found(b)
- )
-}
-
-/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
-private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
- exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
- exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
- result = prefix_with_module_if_found(cls) + "." + b.getName()
- )
- or
- not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
- result = b.getName()
- )
-}
-
-/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
-private string prefix_with_module_if_found(Builtin b) {
- exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
- result = mod.getName() + "." + b.getName()
- )
- or
- not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
- result = b.getName()
-}
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
index 0627615ca64..e0692ffeae0 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
@@ -11,11 +11,9 @@ relevant for security analysis of this application.
An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with [param x], where x indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-classmethods, parameter 0 represents the class instance or class itself
-respectively.
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with [position index] or [keyword name],
+to indicate the argument passing the untrusted data.
Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for pickle.loads which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.
Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.
-
Note: Currently this query will only report results for external packages that are extracted.
-
diff --git a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
index fc7f1a18da9..2b8c31d37b7 100644
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.
An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with [param x], where x indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-classmethods, parameter 0 represents the class instance or class itself
-respectively.
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with [position index] or [keyword name],
+to indicate the argument passing the untrusted data.
Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for pickle.loads which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.
Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.
-
Note: Currently this query will only report results for external packages that are extracted.
-
diff --git a/python/ql/src/Security/CWE-022/examples/tainted_path.py b/python/ql/src/Security/CWE-022/examples/tainted_path.py
index 31dfafc9309..b3d402874d5 100644
--- a/python/ql/src/Security/CWE-022/examples/tainted_path.py
+++ b/python/ql/src/Security/CWE-022/examples/tainted_path.py
@@ -1,36 +1,30 @@
import os.path
+from flask import Flask, request, abort
+app = Flask(__name__)
-urlpatterns = [
- # Route to user_picture
- url(r'^user-pic1$', user_picture1, name='user-picture1'),
- url(r'^user-pic2$', user_picture2, name='user-picture2'),
- url(r'^user-pic3$', user_picture3, name='user-picture3')
-]
-
-
-def user_picture1(request):
- """A view that is vulnerable to malicious file access."""
- filename = request.GET.get('p')
+@app.route("/user_picture1")
+def user_picture1():
+ filename = request.args.get('p')
# BAD: This could read any file on the file system
data = open(filename, 'rb').read()
- return HttpResponse(data)
+ return data
-def user_picture2(request):
- """A view that is vulnerable to malicious file access."""
+@app.route("/user_picture2")
+def user_picture2():
base_path = '/server/static/images'
- filename = request.GET.get('p')
+ filename = request.args.get('p')
# BAD: This could still read any file on the file system
data = open(os.path.join(base_path, filename), 'rb').read()
- return HttpResponse(data)
+ return data
-def user_picture3(request):
- """A view that is not vulnerable to malicious file access."""
+@app.route("/user_picture3")
+def user_picture3():
base_path = '/server/static/images'
- filename = request.GET.get('p')
+ filename = request.args.get('p')
#GOOD -- Verify with normalised version of path
fullpath = os.path.normpath(os.path.join(base_path, filename))
if not fullpath.startswith(base_path):
- raise SecurityException()
+ raise Exception("not allowed")
data = open(fullpath, 'rb').read()
- return HttpResponse(data)
+ return data
diff --git a/python/ql/src/Security/CWE-022/examples/tarslip_bad.py b/python/ql/src/Security/CWE-022/examples/tarslip_bad.py
index d0a233e2c4b..d7976e97b5b 100644
--- a/python/ql/src/Security/CWE-022/examples/tarslip_bad.py
+++ b/python/ql/src/Security/CWE-022/examples/tarslip_bad.py
@@ -1,7 +1,7 @@
-
+import sys
import tarfile
-with tarfile.open('archive.zip') as tar:
+with tarfile.open(sys.argv[1]) as tar:
#BAD : This could write any file on the filesystem.
for entry in tar:
tar.extract(entry, "/tmp/unpack/")
diff --git a/python/ql/src/Security/CWE-022/examples/tarslip_good.py b/python/ql/src/Security/CWE-022/examples/tarslip_good.py
index 59be338d212..0791ab9d3e8 100644
--- a/python/ql/src/Security/CWE-022/examples/tarslip_good.py
+++ b/python/ql/src/Security/CWE-022/examples/tarslip_good.py
@@ -1,8 +1,8 @@
-
+import sys
import tarfile
import os.path
-with tarfile.open('archive.zip') as tar:
+with tarfile.open(sys.argv[1]) as tar:
for entry in tar:
#GOOD: Check that entry is safe
if os.path.isabs(entry.name) or ".." in entry.name:
diff --git a/python/ql/src/change-notes/released/0.6.3.md b/python/ql/src/change-notes/released/0.6.3.md
new file mode 100644
index 00000000000..83374bcef56
--- /dev/null
+++ b/python/ql/src/change-notes/released/0.6.3.md
@@ -0,0 +1,3 @@
+## 0.6.3
+
+No user-facing changes.
diff --git a/python/ql/src/codeql-pack.release.yml b/python/ql/src/codeql-pack.release.yml
index 5501a2a1cc5..b7dafe32c5d 100644
--- a/python/ql/src/codeql-pack.release.yml
+++ b/python/ql/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
---
-lastReleaseVersion: 0.6.2
+lastReleaseVersion: 0.6.3
diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
new file mode 100644
index 00000000000..1219bbe43bc
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp
@@ -0,0 +1,56 @@
+
+
+
+
+
Extracting files from a malicious tarball without validating that the destination file path
+is within the destination directory using shutil.unpack_archive() can cause files outside the
+destination directory to be overwritten, due to the possible presence of directory traversal elements
+ (..) in archive path names.
+
+
Tarball contain archive entries representing each file in the archive. These entries
+include a file path for the entry, but these file paths are not restricted and may contain
+unexpected special elements such as the directory traversal element (..). If these
+file paths are used to determine an output file to write the contents of the archive item to, then
+the file may be written to an unexpected location. This can result in sensitive information being
+revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
+files.
+
+
For example, if a tarball contains a file entry ../sneaky-file.txt, and the tarball
+is extracted to the directory /tmp/tmp123, then naively combining the paths would result
+in an output file path of /tmp/tmp123/../sneaky-file.txt, which would cause the file to be
+written to /tmp/.
+
+
+
+
+
Ensure that output paths constructed from tarball entries are validated
+to prevent writing files to unexpected locations.
+
+
Consider using a safer module, such as: zipfile
+
+
+
+
+
+In this example an archive is extracted without validating file paths.
+
+
+
+
+
To fix this vulnerability, we need to call the function tarfile.extract()
+ on each member after verifying that it does not contain either .. or startswith /.
+