Merge branch 'main' into redsun82/swift-diagnostics-locations

2026-04-29 10:45:15 +02:00 · 2023-05-15 09:38:58 +02:00
parent 9ffada31a8 b214003720
commit 9a555aea5f
73 changed files with 1684 additions and 176 deletions
--- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Allocation.qll
+++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Allocation.qll
@@ -414,7 +414,7 @@ private module HeuristicAllocation {
    int sizeArg;

    HeuristicAllocationFunctionByName() {
-      Function.super.getName().matches("%alloc%") and
+      Function.super.getName().matches(["%alloc%", "%Alloc%"]) and
      Function.super.getUnspecifiedType() instanceof PointerType and
      sizeArg = unique( | | getAnUnsignedParameter(this))
    }
--- a/Bugs/OverrunWriteProductFlow.ql
+++ b/Bugs/OverrunWriteProductFlow.ql
@@ -47,7 +47,7 @@ VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result }
 * Holds if `(n, state)` pair represents the source of flow for the size
 * expression associated with `alloc`.
 */
-predicate hasSize(AllocationExpr alloc, DataFlow::Node n, int state) {
+predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
  exists(VariableAccess va, Expr size, int delta |
    size = alloc.getSizeExpr() and
    // Get the unique variable in a size expression like `x` in `malloc(x + 1)`.
--- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-119/OverrunWriteProductFlow.expected
+++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-119/OverrunWriteProductFlow.expected
@@ -222,6 +222,7 @@ edges
 | test.cpp:243:12:243:14 | str indirection [string] | test.cpp:243:12:243:21 | string |
 | test.cpp:243:12:243:14 | str indirection [string] | test.cpp:243:16:243:21 | string indirection |
 | test.cpp:243:16:243:21 | string indirection | test.cpp:243:12:243:21 | string |
+| test.cpp:249:20:249:27 | call to my_alloc | test.cpp:250:12:250:12 | p |
 nodes
 | test.cpp:16:11:16:21 | mk_string_t indirection [string] | semmle.label | mk_string_t indirection [string] |
 | test.cpp:18:5:18:30 | ... = ... | semmle.label | ... = ... |
@@ -402,6 +403,8 @@ nodes
 | test.cpp:243:12:243:14 | str indirection [string] | semmle.label | str indirection [string] |
 | test.cpp:243:12:243:21 | string | semmle.label | string |
 | test.cpp:243:16:243:21 | string indirection | semmle.label | string indirection |
+| test.cpp:249:20:249:27 | call to my_alloc | semmle.label | call to my_alloc |
+| test.cpp:250:12:250:12 | p | semmle.label | p |
 subpaths
 | test.cpp:242:22:242:27 | buffer | test.cpp:235:40:235:45 | buffer | test.cpp:236:12:236:17 | p_str indirection [post update] [string] | test.cpp:242:16:242:19 | set_string output argument [string] |
 #select
@@ -422,3 +425,4 @@ subpaths
 | test.cpp:207:9:207:15 | call to strncpy | test.cpp:147:19:147:24 | call to malloc | test.cpp:207:22:207:27 | string | This write may overflow $@ by 3 elements. | test.cpp:207:22:207:27 | string | string |
 | test.cpp:232:3:232:8 | call to memset | test.cpp:228:43:228:48 | call to malloc | test.cpp:232:10:232:15 | buffer | This write may overflow $@ by 32 elements. | test.cpp:232:10:232:15 | buffer | buffer |
 | test.cpp:243:5:243:10 | call to memset | test.cpp:241:27:241:32 | call to malloc | test.cpp:243:12:243:21 | string | This write may overflow $@ by 1 element. | test.cpp:243:16:243:21 | string | string |
+| test.cpp:250:5:250:10 | call to memset | test.cpp:249:20:249:27 | call to my_alloc | test.cpp:250:12:250:12 | p | This write may overflow $@ by 1 element. | test.cpp:250:12:250:12 | p | p |
--- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-119/test.cpp
+++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-119/test.cpp
@@ -243,3 +243,9 @@ void test_flow_through_setter(unsigned size) {
    memset(str.string, 0, size + 1); // BAD
 }

+void* my_alloc(unsigned size);
+
+void foo(unsigned size) {
+    int* p = (int*)my_alloc(size); // BAD
+    memset(p, 0, size + 1);
+}
--- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected
+++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected
@@ -649,6 +649,10 @@ edges
 | test.cpp:280:13:280:24 | new[] | test.cpp:281:14:281:15 | xs |
 | test.cpp:290:13:290:24 | new[] | test.cpp:291:14:291:15 | xs |
 | test.cpp:290:13:290:24 | new[] | test.cpp:292:30:292:30 | x |
+| test.cpp:304:15:304:26 | new[] | test.cpp:307:5:307:6 | xs |
+| test.cpp:304:15:304:26 | new[] | test.cpp:308:5:308:6 | xs |
+| test.cpp:308:5:308:6 | xs | test.cpp:308:5:308:11 | access to array |
+| test.cpp:308:5:308:11 | access to array | test.cpp:308:5:308:29 | Store: ... = ... |
 #select
 | test.cpp:6:14:6:15 | Load: * ... | test.cpp:4:15:4:20 | call to malloc | test.cpp:6:14:6:15 | Load: * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:4:15:4:20 | call to malloc | call to malloc | test.cpp:5:19:5:22 | size | size |
 | test.cpp:8:14:8:21 | Load: * ... | test.cpp:4:15:4:20 | call to malloc | test.cpp:8:14:8:21 | Load: * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@ + 1. | test.cpp:4:15:4:20 | call to malloc | call to malloc | test.cpp:5:19:5:22 | size | size |
@@ -672,3 +676,4 @@ edges
 | test.cpp:254:9:254:16 | Store: ... = ... | test.cpp:248:24:248:30 | call to realloc | test.cpp:254:9:254:16 | Store: ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:248:24:248:30 | call to realloc | call to realloc | test.cpp:254:11:254:11 | i | i |
 | test.cpp:264:13:264:14 | Load: * ... | test.cpp:260:13:260:24 | new[] | test.cpp:264:13:264:14 | Load: * ... | This read might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:260:13:260:24 | new[] | new[] | test.cpp:261:19:261:21 | len | len |
 | test.cpp:274:5:274:10 | Store: ... = ... | test.cpp:270:13:270:24 | new[] | test.cpp:274:5:274:10 | Store: ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:270:13:270:24 | new[] | new[] | test.cpp:271:19:271:21 | len | len |
+| test.cpp:308:5:308:29 | Store: ... = ... | test.cpp:304:15:304:26 | new[] | test.cpp:308:5:308:29 | Store: ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:304:15:304:26 | new[] | new[] | test.cpp:308:8:308:10 | ... + ... | ... + ... |
--- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp
+++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp
@@ -293,4 +293,18 @@ void test20(unsigned len)
  {
    *x = 0; // GOOD
  }
-}
+}
+
+void* test21_get(int n);
+
+void test21() {
+  int n = 0;
+  while (test21_get(n)) n+=2;
+
+  void** xs = new void*[n];
+
+  for (int i = 0; i < n; i += 2) {
+    xs[i] = test21_get(i);
+    xs[i+1] = test21_get(i+1);
+  }
+}
--- a/cpp/ql/test/library-tests/ir/range-analysis/test.cpp
+++ b/cpp/ql/test/library-tests/ir/range-analysis/test.cpp
@@ -49,3 +49,13 @@
    return 0;
  }

+  void* f3_get(int n);
+
+  void f3() {
+    int n = 0;
+    while (f3_get(n)) n+=2;
+
+    for (int i = 0; i < n; i += 2) {
+      range(i); // $ range=>=0 SPURIOUS: range="<=call to f3_get-1" range="<=call to f3_get-2"
+    }
+  }
--- a/go/ql/lib/semmle/go/security/SensitiveActions.qll
+++ b/go/ql/lib/semmle/go/security/SensitiveActions.qll
@@ -35,7 +35,7 @@ module HeuristicNames {
   */
  string maybePassword() {
    result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or
-    result = "(?is).*(auth(entication|ori[sz]ation)?)key.*"
+    result = "(?is).*(auth(entication|ori[sz]ation)?|api)key.*"
  }

  /**
--- a/go/ql/src/experimental/CWE-134/DsnBad.go
+++ b/go/ql/src/experimental/CWE-134/DsnBad.go
@@ -0,0 +1,8 @@
+
+func bad() interface{} {
+	name := os.Args[1:]
+	// This is bad. `name` can be something like `test?allowAllFiles=true&` which will allow an attacker to access local files.
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db
+}
--- a/go/ql/src/experimental/CWE-134/DsnGood.go
+++ b/go/ql/src/experimental/CWE-134/DsnGood.go
@@ -0,0 +1,12 @@
+func good() (interface{}, error) {
+	name := os.Args[1]
+	hasBadChar, _ := regexp.MatchString(".*[?].*", name)
+
+	if hasBadChar {
+		return nil, errors.New("Bad input")
+	}
+
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db, nil
+}
--- a/go/ql/src/experimental/CWE-134/DsnInjection.qhelp
+++ b/go/ql/src/experimental/CWE-134/DsnInjection.qhelp
@@ -0,0 +1,38 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+
+<qhelp>
+  <overview>
+    <p>If a Data-Source Name (DSN) is built using untrusted user input without proper sanitization,
+      the system may be vulnerable to DSN injection vulnerabilities.</p>
+  </overview>
+
+  <recommendation>
+    <p>If user input must be included in a DSN, additional steps should be taken to sanitize
+      untrusted data, such as checking for special characters included in user input.</p>
+  </recommendation>
+
+  <example>
+    <p>In the following examples, the code accepts the db name from the user,
+      which it then uses to build a DSN string.</p>
+
+    <p>The following example uses the unsanitized user input directly
+      in the process of constructing a DSN name.
+      A malicious user could provide special characters to change the meaning of this string, and
+      carry out unexpected database operations.</p>
+
+    <sample src="DsnBad.go" />
+
+    <p>In the following example, the input provided by the user is sanitized before it is included
+      in the DSN string.
+      This ensures the meaning of the DSN string cannot be changed by a malicious user.</p>
+
+    <sample src="DsnGood.go" />
+  </example>
+
+  <references>
+    <li>
+      CVE-2022-3023: <a href="https://nvd.nist.gov/vuln/detail/CVE-2022-3023/">Data Source Name Injection in pingcap/tidb.</a>
+    </li>
+
+  </references>
+</qhelp>
--- a/go/ql/src/experimental/CWE-134/DsnInjection.ql
+++ b/go/ql/src/experimental/CWE-134/DsnInjection.ql
@@ -0,0 +1,22 @@
+/**
+ * @name SQL Data-source URI built from user-controlled sources
+ * @description Building an SQL data-source URI from untrusted sources can allow attacker to compromise security
+ * @kind path-problem
+ * @problem.severity error
+ * @id go/dsn-injection
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-134
+ */
+
+import go
+import DataFlow::PathGraph
+import DsnInjectionCustomizations
+
+/** An untrusted flow source taken as a source for the `DsnInjection` taint-flow configuration. */
+private class UntrustedFlowAsSource extends Source instanceof UntrustedFlowSource { }
+
+from DsnInjection cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This query depends on a $@.", source.getNode(),
+  "user-provided value"
--- a/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll
+++ b/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll
@@ -0,0 +1,46 @@
+/** Provides a taint-tracking model to reason about Data-Source name injection vulnerabilities. */
+
+import go
+import DataFlow::PathGraph
+import semmle.go.dataflow.barrierguardutil.RegexpCheck
+
+/** A source for `DsnInjection` taint-flow configuration. */
+abstract class Source extends DataFlow::Node { }
+
+/** A taint-tracking configuration to reason about Data Source Name injection vulnerabilities. */
+class DsnInjection extends TaintTracking::Configuration {
+  DsnInjection() { this = "DsnInjection" }
+
+  override predicate isSource(DataFlow::Node node) { node instanceof Source }
+
+  override predicate isSink(DataFlow::Node node) {
+    exists(Function f | f.hasQualifiedName("database/sql", "Open") |
+      node = f.getACall().getArgument(1)
+    )
+  }
+
+  override predicate isSanitizer(DataFlow::Node node) { node instanceof RegexpCheckBarrier }
+}
+
+/** A model of a function which decodes or unmarshals a tainted input, propagating taint from any argument to either the method receiver or return value. */
+private class DecodeFunctionModel extends TaintTracking::FunctionModel {
+  DecodeFunctionModel() {
+    // This matches any function with a name like `Decode`,`Unmarshal` or `Parse`.
+    // This is done to allow taints stored in encoded forms, such as in toml or json to flow freely.
+    this.getName().regexpMatch("(?i).*(parse|decode|unmarshal).*")
+  }
+
+  override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+    input.isParameter(_) and
+    (output.isResult(0) or output.isReceiver())
+  }
+}
+
+/** A model of `flag.Parse`, propagating tainted input passed via CLI flags to `Parse`'s result. */
+private class FlagSetFunctionModel extends TaintTracking::FunctionModel {
+  FlagSetFunctionModel() { this.hasQualifiedName("flag", "Parse") }
+
+  override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
+    input.isParameter(0) and output.isResult()
+  }
+}
--- a/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql
+++ b/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql
@@ -0,0 +1,24 @@
+/**
+ * @name SQL Data-source URI built from local user-controlled sources
+ * @description Building an SQL data-source URI from untrusted sources can allow attacker to compromise security
+ * @kind path-problem
+ * @problem.severity error
+ * @id go/dsn-injection-local
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-134
+ */
+
+import go
+import DataFlow::PathGraph
+import DsnInjectionCustomizations
+
+/** An argument passed via the command line taken as a source for the `DsnInjection` taint-flow configuration. */
+private class OsArgsSource extends Source {
+  OsArgsSource() { this = any(Variable c | c.hasQualifiedName("os", "Args")).getARead() }
+}
+
+from DsnInjection cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This query depends on a $@.", source.getNode(),
+  "user-provided value"
--- a/go/ql/src/experimental/CWE-203/Timing.qhelp
+++ b/go/ql/src/experimental/CWE-203/Timing.qhelp
@@ -0,0 +1,36 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+  <overview>
+    <p>
+      Using a non-constant time comparision to compare sensitive information can lead to auth
+      vulnerabilities.
+</p>
+  </overview>
+
+  <recommendation>
+    <p>Use of a constant time comparision function such as <code>crypto/subtle</code> package's <code>
+      ConstantTimeCompare</code> function can prevent this vulnerability. </p>
+  </recommendation>
+
+  <example>
+    <p>In the following examples, the code accepts a secret via a HTTP header in variable <code>
+      secretHeader</code> and a secret from the user in the <code>headerSecret</code> variable, which
+      are then compared with a system stored secret to perform authentication.</p>
+
+
+    <sample src="timingBad.go" />
+
+    <p>In the following example, the input provided by the user is compared using the <code>
+      ConstantTimeComapre</code> function. This ensures that timing attacks are not possible in this
+      case.</p>
+
+    <sample src="timingGood.go" />
+  </example>
+
+  <references>
+    <li>National Vulnerability Database: <a href="https://nvd.nist.gov/vuln/detail/CVE-2022-24912">
+      CVE-2022-24912</a>.</li>
+    <li>Verbose Logging:<a href="https://verboselogging.com/2012/08/20/a-timing-attack-in-action"> A
+      timing attack in action </a></li>
+  </references>
+</qhelp>
--- a/go/ql/src/experimental/CWE-203/Timing.ql
+++ b/go/ql/src/experimental/CWE-203/Timing.ql
@@ -0,0 +1,72 @@
+/**
+ * @name Timing attacks due to comparison of sensitive secrets
+ * @description using a non-constant time comparison method to compare secrets can lead to authoriztion vulnerabilities
+ * @kind path-problem
+ * @problem.severity warning
+ * @id go/timing-attack
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-203
+ */
+
+import go
+import DataFlow::PathGraph
+import semmle.go.security.SensitiveActions
+
+private predicate isBadResult(DataFlow::Node e) {
+  exists(string path | path = e.asExpr().getFile().getAbsolutePath().toLowerCase() |
+    path.matches(["%fake%", "%dummy%", "%test%", "%example%"]) and not path.matches("%ql/test%")
+  )
+}
+
+/**
+ * A data flow sink for timing attack vulnerabilities.
+ */
+abstract class Sink extends DataFlow::Node { }
+
+/** A taint-tracking sink which models comparisons of sensitive variables. */
+private class SensitiveCompareSink extends Sink {
+  ComparisonExpr c;
+
+  SensitiveCompareSink() {
+    // We select a comparison where a secret or password is tested.
+    exists(SensitiveVariableAccess op1, Expr op2 |
+      op1.getClassification() = [SensitiveExpr::secret(), SensitiveExpr::password()] and
+      // exclude grant to avoid FP from OAuth
+      not op1.getClassification().matches("%grant%") and
+      op1 = c.getAnOperand() and
+      op2 = c.getAnOperand() and
+      not op1 = op2 and
+      not (
+        // Comparisons with `nil` should be excluded.
+        op2 = Builtin::nil().getAReference()
+        or
+        // Comparisons with empty string should also be excluded.
+        op2.getStringValue().length() = 0
+      )
+    |
+      // It is important to note that the name of both the operands need not be
+      // `sensitive`. Even if one of the operands appears to be sensitive, we consider it a potential sink.
+      c.getAnOperand() = this.asExpr()
+    )
+  }
+
+  DataFlow::Node getOtherOperand() { result.asExpr() = c.getAnOperand() and not result = this }
+}
+
+class SecretTracking extends TaintTracking::Configuration {
+  SecretTracking() { this = "SecretTracking" }
+
+  override predicate isSource(DataFlow::Node source) {
+    source instanceof UntrustedFlowSource and not isBadResult(source)
+  }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink and not isBadResult(sink) }
+}
+
+from SecretTracking cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where
+  cfg.hasFlowPath(source, sink) and
+  not cfg.hasFlowTo(sink.getNode().(SensitiveCompareSink).getOtherOperand())
+select sink.getNode(), source, sink, "$@ may be vulnerable to timing attacks.", source.getNode(),
+  "Hardcoded String"
--- a/go/ql/src/experimental/CWE-203/timingBad.go
+++ b/go/ql/src/experimental/CWE-203/timingBad.go
@@ -0,0 +1,11 @@
+func bad(w http.ResponseWriter, req *http.Request, []byte secret) (interface{}, error) {
+
+	secretHeader := "X-Secret"
+
+	headerSecret := req.Header.Get(secretHeader)
+	secretStr := string(secret)
+	if len(secret) != 0 && headerSecret != secretStr {
+		return nil, fmt.Errorf("header %s=%s did not match expected secret", secretHeader, headerSecret)
+	}
+	return nil, nil
+}
--- a/go/ql/src/experimental/CWE-203/timingGood.go
+++ b/go/ql/src/experimental/CWE-203/timingGood.go
@@ -0,0 +1,10 @@
+func good(w http.ResponseWriter, req *http.Request, []byte secret) (interface{}, error) {
+
+	secretHeader := "X-Secret"
+
+	headerSecret := req.Header.Get(secretHeader)
+	if len(secret) != 0 && subtle.ConstantTimeCompare(secret, []byte(headerSecret)) != 1 {
+		return nil, fmt.Errorf("header %s=%s did not match expected secret", secretHeader, headerSecret)
+	}
+	return nil, nil
+}
--- a/go/ql/test/TestUtilities/InlineFlowTest.qll
+++ b/go/ql/test/TestUtilities/InlineFlowTest.qll
@@ -78,7 +78,7 @@ class InlineFlowTest extends InlineExpectationsTest {

  override predicate hasActualResult(Location location, string element, string tag, string value) {
    tag = "hasValueFlow" and
-    exists(DataFlow::Node sink | getValueFlowConfig().hasFlowTo(sink) |
+    exists(DataFlow::Node sink | this.getValueFlowConfig().hasFlowTo(sink) |
      sink.hasLocationInfo(location.getFile().getAbsolutePath(), location.getStartLine(),
        location.getStartColumn(), location.getEndLine(), location.getEndColumn()) and
      element = sink.toString() and
@@ -87,7 +87,8 @@ class InlineFlowTest extends InlineExpectationsTest {
    or
    tag = "hasTaintFlow" and
    exists(DataFlow::Node src, DataFlow::Node sink |
-      getTaintFlowConfig().hasFlow(src, sink) and not getValueFlowConfig().hasFlow(src, sink)
+      this.getTaintFlowConfig().hasFlow(src, sink) and
+      not this.getValueFlowConfig().hasFlow(src, sink)
    |
      sink.hasLocationInfo(location.getFile().getAbsolutePath(), location.getStartLine(),
        location.getStartColumn(), location.getEndLine(), location.getEndColumn()) and
--- a/go/ql/test/experimental/CWE-134/Dsn.go
+++ b/go/ql/test/experimental/CWE-134/Dsn.go
@@ -0,0 +1,77 @@
+package main
+
+import (
+	"database/sql"
+	"errors"
+	"fmt"
+	"net/http"
+	"os"
+	"regexp"
+)
+
+func good() (interface{}, error) {
+	name := os.Args[1]
+	hasBadChar, _ := regexp.MatchString(".*[?].*", name)
+
+	if hasBadChar {
+		return nil, errors.New("bad input")
+	}
+
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db, nil
+}
+
+func bad() interface{} {
+	name2 := os.Args[1:]
+	// This is bad. `name` can be something like `test?allowAllFiles=true&` which will allow an attacker to access local files.
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name2[0])
+	db, _ := sql.Open("mysql", dbDSN)
+	return db
+}
+
+func good2(w http.ResponseWriter, req *http.Request) (interface{}, error) {
+	name := req.FormValue("name")
+	hasBadChar, _ := regexp.MatchString(".*[?].*", name)
+
+	if hasBadChar {
+		return nil, errors.New("bad input")
+	}
+
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db, nil
+}
+
+func bad2(w http.ResponseWriter, req *http.Request) interface{} {
+	name := req.FormValue("name")
+	// This is bad. `name` can be something like `test?allowAllFiles=true&` which will allow an attacker to access local files.
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, name)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db
+}
+
+type Config struct {
+	dsn string
+}
+
+func NewConfig() *Config            { return &Config{dsn: ""} }
+func (Config) Parse([]string) error { return nil }
+
+func RegexFuncModelTest(w http.ResponseWriter, req *http.Request) (interface{}, error) {
+	cfg := NewConfig()
+	err := cfg.Parse(os.Args[1:]) // This is bad. `name` can be something like `test?allowAllFiles=true&` which will allow an attacker to access local files.
+	if err != nil {
+		return nil, err
+	}
+	dbDSN := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8", "username", "password", "127.0.0.1", 3306, cfg.dsn)
+	db, _ := sql.Open("mysql", dbDSN)
+	return db, nil
+}
+
+func main() {
+	bad2(nil, nil)
+	good()
+	bad()
+	good2(nil, nil)
+}
--- a/go/ql/test/experimental/CWE-134/DsnInjection.expected
+++ b/go/ql/test/experimental/CWE-134/DsnInjection.expected
@@ -0,0 +1,8 @@
+edges
+| Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN |
+nodes
+| Dsn.go:47:10:47:30 | call to FormValue | semmle.label | call to FormValue |
+| Dsn.go:50:29:50:33 | dbDSN | semmle.label | dbDSN |
+subpaths
+#select
+| Dsn.go:50:29:50:33 | dbDSN | Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN | This query depends on a $@. | Dsn.go:47:10:47:30 | call to FormValue | user-provided value |
--- a/go/ql/test/experimental/CWE-134/DsnInjection.qlref
+++ b/go/ql/test/experimental/CWE-134/DsnInjection.qlref
@@ -0,0 +1 @@
+experimental/CWE-134/DsnInjection.ql
--- a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected
+++ b/go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected
@@ -0,0 +1,27 @@
+edges
+| Dsn.go:26:11:26:17 | selection of Args | Dsn.go:29:29:29:33 | dbDSN |
+| Dsn.go:62:2:62:4 | definition of cfg [pointer] | Dsn.go:63:9:63:11 | cfg [pointer] |
+| Dsn.go:62:2:62:4 | definition of cfg [pointer] | Dsn.go:67:102:67:104 | cfg [pointer] |
+| Dsn.go:63:9:63:11 | cfg [pointer] | Dsn.go:63:9:63:11 | implicit dereference |
+| Dsn.go:63:9:63:11 | implicit dereference | Dsn.go:62:2:62:4 | definition of cfg [pointer] |
+| Dsn.go:63:9:63:11 | implicit dereference | Dsn.go:63:9:63:11 | implicit dereference |
+| Dsn.go:63:9:63:11 | implicit dereference | Dsn.go:68:29:68:33 | dbDSN |
+| Dsn.go:63:19:63:25 | selection of Args | Dsn.go:63:9:63:11 | implicit dereference |
+| Dsn.go:63:19:63:25 | selection of Args | Dsn.go:68:29:68:33 | dbDSN |
+| Dsn.go:67:102:67:104 | cfg [pointer] | Dsn.go:67:102:67:104 | implicit dereference |
+| Dsn.go:67:102:67:104 | implicit dereference | Dsn.go:63:9:63:11 | implicit dereference |
+| Dsn.go:67:102:67:104 | implicit dereference | Dsn.go:68:29:68:33 | dbDSN |
+nodes
+| Dsn.go:26:11:26:17 | selection of Args | semmle.label | selection of Args |
+| Dsn.go:29:29:29:33 | dbDSN | semmle.label | dbDSN |
+| Dsn.go:62:2:62:4 | definition of cfg [pointer] | semmle.label | definition of cfg [pointer] |
+| Dsn.go:63:9:63:11 | cfg [pointer] | semmle.label | cfg [pointer] |
+| Dsn.go:63:9:63:11 | implicit dereference | semmle.label | implicit dereference |
+| Dsn.go:63:19:63:25 | selection of Args | semmle.label | selection of Args |
+| Dsn.go:67:102:67:104 | cfg [pointer] | semmle.label | cfg [pointer] |
+| Dsn.go:67:102:67:104 | implicit dereference | semmle.label | implicit dereference |
+| Dsn.go:68:29:68:33 | dbDSN | semmle.label | dbDSN |
+subpaths
+#select
+| Dsn.go:29:29:29:33 | dbDSN | Dsn.go:26:11:26:17 | selection of Args | Dsn.go:29:29:29:33 | dbDSN | This query depends on a $@. | Dsn.go:26:11:26:17 | selection of Args | user-provided value |
+| Dsn.go:68:29:68:33 | dbDSN | Dsn.go:63:19:63:25 | selection of Args | Dsn.go:68:29:68:33 | dbDSN | This query depends on a $@. | Dsn.go:63:19:63:25 | selection of Args | user-provided value |
--- a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref
+++ b/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref
@@ -0,0 +1 @@
+experimental/CWE-134/DsnInjectionLocal.ql
--- a/go/ql/test/experimental/CWE-203/Timing.expected
+++ b/go/ql/test/experimental/CWE-203/Timing.expected
@@ -0,0 +1,10 @@
+edges
+| timing.go:14:18:14:27 | selection of Header | timing.go:14:18:14:45 | call to Get |
+| timing.go:14:18:14:45 | call to Get | timing.go:16:25:16:36 | headerSecret |
+nodes
+| timing.go:14:18:14:27 | selection of Header | semmle.label | selection of Header |
+| timing.go:14:18:14:45 | call to Get | semmle.label | call to Get |
+| timing.go:16:25:16:36 | headerSecret | semmle.label | headerSecret |
+subpaths
+#select
+| timing.go:16:25:16:36 | headerSecret | timing.go:14:18:14:27 | selection of Header | timing.go:16:25:16:36 | headerSecret | $@ may be vulnerable to timing attacks. | timing.go:14:18:14:27 | selection of Header | Hardcoded String |
--- a/go/ql/test/experimental/CWE-203/Timing.qlref
+++ b/go/ql/test/experimental/CWE-203/Timing.qlref
@@ -0,0 +1 @@
+experimental/CWE-203/Timing.ql
--- a/go/ql/test/experimental/CWE-203/timing.go
+++ b/go/ql/test/experimental/CWE-203/timing.go
@@ -0,0 +1,37 @@
+package main
+
+import (
+	"crypto/subtle"
+	"fmt"
+	"net/http"
+)
+
+func bad(w http.ResponseWriter, req *http.Request) (interface{}, error) {
+
+	secret := "MySuperSecretPasscode"
+	secretHeader := "X-Secret"
+
+	headerSecret := req.Header.Get(secretHeader)
+	secretStr := string(secret)
+	if len(secret) != 0 && headerSecret != secretStr {
+		return nil, fmt.Errorf("header %s=%s did not match expected secret", secretHeader, headerSecret)
+	}
+	return nil, nil
+}
+
+func good(w http.ResponseWriter, req *http.Request) (interface{}, error) {
+
+	secret := []byte("MySuperSecretPasscode")
+	secretHeader := "X-Secret"
+
+	headerSecret := req.Header.Get(secretHeader)
+	if len(secret) != 0 && subtle.ConstantTimeCompare(secret, []byte(headerSecret)) != 1 {
+		return nil, fmt.Errorf("header %s=%s did not match expected secret", secretHeader, headerSecret)
+	}
+	return nil, nil
+}
+
+func main() {
+	bad(nil, nil)
+	good(nil, nil)
+}
--- a/java/documentation/library-coverage/coverage.csv
+++ b/java/documentation/library-coverage/coverage.csv
@@ -92,6 +92,7 @@ org.apache.commons.net,9,12,,,,,,,,,,,,,,,,6,,3,,,,,,,,,,,,,,,,,,,12,,
 org.apache.commons.ognl,6,,,,,,,,,,,,,,,,6,,,,,,,,,,,,,,,,,,,,,,,,
 org.apache.commons.text,,,272,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,220,52
 org.apache.directory.ldap.client.api,1,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,
+org.apache.hadoop.fs,,,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,
 org.apache.hadoop.hive.metastore,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,,,,,,,,,,,
 org.apache.hc.client5.http.async.methods,84,,,,,,,,,,,,,,,,,84,,,,,,,,,,,,,,,,,,,,,,,
 org.apache.hc.client5.http.classic.methods,37,,,,,,,,,,,,,,,,,37,,,,,,,,,,,,,,,,,,,,,,,
--- a/java/documentation/library-coverage/coverage.rst
+++ b/java/documentation/library-coverage/coverage.rst
@@ -22,6 +22,6 @@ Java framework & library support
   Java extensions,"``javax.*``, ``jakarta.*``",63,611,34,1,4,,1,1,2
   Kotlin Standard Library,``kotlin*``,,1843,16,11,,,,,2
   `Spring <https://spring.io/>`_,``org.springframework.*``,29,483,104,2,,19,14,,29
-   Others,"``cn.hutool.core.codec``, ``com.esotericsoftware.kryo.io``, ``com.esotericsoftware.kryo5.io``, ``com.fasterxml.jackson.core``, ``com.fasterxml.jackson.databind``, ``com.hubspot.jinjava``, ``com.mitchellbosecke.pebble``, ``com.opensymphony.xwork2.ognl``, ``com.rabbitmq.client``, ``com.thoughtworks.xstream``, ``com.unboundid.ldap.sdk``, ``com.zaxxer.hikari``, ``flexjson``, ``freemarker.cache``, ``freemarker.template``, ``groovy.lang``, ``groovy.util``, ``hudson``, ``io.jsonwebtoken``, ``io.netty.bootstrap``, ``io.netty.buffer``, ``io.netty.channel``, ``io.netty.handler.codec``, ``io.netty.handler.ssl``, ``io.netty.handler.stream``, ``io.netty.resolver``, ``io.netty.util``, ``javafx.scene.web``, ``jodd.json``, ``net.sf.saxon.s9api``, ``ognl``, ``okhttp3``, ``org.apache.commons.codec``, ``org.apache.commons.compress.archivers.tar``, ``org.apache.commons.httpclient.util``, ``org.apache.commons.jelly``, ``org.apache.commons.jexl2``, ``org.apache.commons.jexl3``, ``org.apache.commons.logging``, ``org.apache.commons.net``, ``org.apache.commons.ognl``, ``org.apache.directory.ldap.client.api``, ``org.apache.hadoop.hive.metastore``, ``org.apache.hc.client5.http.async.methods``, ``org.apache.hc.client5.http.classic.methods``, ``org.apache.hc.client5.http.fluent``, ``org.apache.hive.hcatalog.templeton``, ``org.apache.ibatis.jdbc``, ``org.apache.log4j``, ``org.apache.shiro.codec``, ``org.apache.shiro.jndi``, ``org.apache.tools.ant``, ``org.apache.tools.zip``, ``org.apache.velocity.app``, ``org.apache.velocity.runtime``, ``org.codehaus.cargo.container.installer``, ``org.codehaus.groovy.control``, ``org.dom4j``, ``org.eclipse.jetty.client``, ``org.geogebra.web.full.main``, ``org.hibernate``, ``org.jdbi.v3.core``, ``org.jooq``, ``org.kohsuke.stapler``, ``org.mvel2``, ``org.openjdk.jmh.runner.options``, ``org.scijava.log``, ``org.slf4j``, ``org.thymeleaf``, ``org.xml.sax``, ``org.xmlpull.v1``, ``play.mvc``, ``ratpack.core.form``, ``ratpack.core.handling``, ``ratpack.core.http``, ``ratpack.exec``, ``ratpack.form``, ``ratpack.func``, ``ratpack.handling``, ``ratpack.http``, ``ratpack.util``, ``retrofit2``",89,817,515,26,,18,18,,181
-   Totals,,246,9109,1957,174,10,113,33,1,361
+   Others,"``cn.hutool.core.codec``, ``com.esotericsoftware.kryo.io``, ``com.esotericsoftware.kryo5.io``, ``com.fasterxml.jackson.core``, ``com.fasterxml.jackson.databind``, ``com.hubspot.jinjava``, ``com.mitchellbosecke.pebble``, ``com.opensymphony.xwork2.ognl``, ``com.rabbitmq.client``, ``com.thoughtworks.xstream``, ``com.unboundid.ldap.sdk``, ``com.zaxxer.hikari``, ``flexjson``, ``freemarker.cache``, ``freemarker.template``, ``groovy.lang``, ``groovy.util``, ``hudson``, ``io.jsonwebtoken``, ``io.netty.bootstrap``, ``io.netty.buffer``, ``io.netty.channel``, ``io.netty.handler.codec``, ``io.netty.handler.ssl``, ``io.netty.handler.stream``, ``io.netty.resolver``, ``io.netty.util``, ``javafx.scene.web``, ``jodd.json``, ``net.sf.saxon.s9api``, ``ognl``, ``okhttp3``, ``org.apache.commons.codec``, ``org.apache.commons.compress.archivers.tar``, ``org.apache.commons.httpclient.util``, ``org.apache.commons.jelly``, ``org.apache.commons.jexl2``, ``org.apache.commons.jexl3``, ``org.apache.commons.logging``, ``org.apache.commons.net``, ``org.apache.commons.ognl``, ``org.apache.directory.ldap.client.api``, ``org.apache.hadoop.fs``, ``org.apache.hadoop.hive.metastore``, ``org.apache.hc.client5.http.async.methods``, ``org.apache.hc.client5.http.classic.methods``, ``org.apache.hc.client5.http.fluent``, ``org.apache.hive.hcatalog.templeton``, ``org.apache.ibatis.jdbc``, ``org.apache.log4j``, ``org.apache.shiro.codec``, ``org.apache.shiro.jndi``, ``org.apache.tools.ant``, ``org.apache.tools.zip``, ``org.apache.velocity.app``, ``org.apache.velocity.runtime``, ``org.codehaus.cargo.container.installer``, ``org.codehaus.groovy.control``, ``org.dom4j``, ``org.eclipse.jetty.client``, ``org.geogebra.web.full.main``, ``org.hibernate``, ``org.jdbi.v3.core``, ``org.jooq``, ``org.kohsuke.stapler``, ``org.mvel2``, ``org.openjdk.jmh.runner.options``, ``org.scijava.log``, ``org.slf4j``, ``org.thymeleaf``, ``org.xml.sax``, ``org.xmlpull.v1``, ``play.mvc``, ``ratpack.core.form``, ``ratpack.core.handling``, ``ratpack.core.http``, ``ratpack.exec``, ``ratpack.form``, ``ratpack.func``, ``ratpack.handling``, ``ratpack.http``, ``ratpack.util``, ``retrofit2``",89,827,515,26,,18,18,,181
+   Totals,,246,9119,1957,174,10,113,33,1,361

--- a/java/kotlin-extractor/src/main/java/com/semmle/extractor/java/OdasaOutput.java
+++ b/java/kotlin-extractor/src/main/java/com/semmle/extractor/java/OdasaOutput.java
@@ -50,13 +50,9 @@ import com.semmle.util.trap.dependencies.TrapSet;
 import com.semmle.util.trap.pathtransformers.PathTransformer;

 public class OdasaOutput {
-	// either these are set ...
 	private final File trapFolder;
 	private final File sourceArchiveFolder;

-	// ... or this one is set
-	private final PopulationSpecFile specFile;
-
 	private File currentSourceFile;
 	private TrapSet trapsCreated;
 	private TrapDependencies trapDependenciesForSource;
@@ -72,29 +68,21 @@ public class OdasaOutput {
 	OdasaOutput(File outputRoot, Logger log) {
 		this.trapFolder = new File(outputRoot, "trap");
 		this.sourceArchiveFolder = new File(outputRoot, "src_archive");
-		this.specFile = null;
 		this.trackClassOrigins = false;
 		this.log = log;
 	}

 	public OdasaOutput(boolean trackClassOrigins, Logger log) {
 		String trapFolderVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_TRAP_DIR", Var.TRAP_FOLDER.name());
-		if (trapFolderVar != null) {
-			String sourceArchiveVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR", Var.SOURCE_ARCHIVE.name());
-			if (sourceArchiveVar == null)
-				throw new ResourceError(Var.TRAP_FOLDER + " was set to '" + trapFolderVar + "', but "
-						+ Var.SOURCE_ARCHIVE + " was not set");
-			this.trapFolder = new File(trapFolderVar);
-			this.sourceArchiveFolder = new File(sourceArchiveVar);
-			this.specFile = null;
-		} else {
-			this.trapFolder = null;
-			this.sourceArchiveFolder = null;
-			String specFileVar = Env.systemEnv().get(Var.ODASA_JAVA_LAYOUT);
-			if (specFileVar == null)
-				throw new ResourceError("Neither " + Var.TRAP_FOLDER + " nor " + Var.ODASA_JAVA_LAYOUT + " was set");
-			this.specFile = new PopulationSpecFile(new File(specFileVar));
+		if (trapFolderVar == null) {
+			throw new ResourceError("CODEQL_EXTRACTOR_JAVA_TRAP_DIR was not set");
 		}
+		String sourceArchiveVar = Env.systemEnv().getFirstNonEmpty("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR", Var.SOURCE_ARCHIVE.name());
+		if (sourceArchiveVar == null) {
+			throw new ResourceError("CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR was not set");
+    }
+		this.trapFolder = new File(trapFolderVar);
+		this.sourceArchiveFolder = new File(sourceArchiveVar);
 		this.trackClassOrigins = trackClassOrigins;
 		this.log = log;
 	}
@@ -123,11 +111,8 @@ public class OdasaOutput {

 	/** The output paths for that file, or null if it shouldn't be included */
 	private SpecFileEntry entryFor() {
-		if (specFile != null)
-			return specFile.getEntryFor(currentSourceFile);
-		else
-			return new SpecFileEntry(trapFolder, sourceArchiveFolder,
-					Arrays.asList(PathTransformer.std().fileAsDatabaseString(currentSourceFile)));
+		return new SpecFileEntry(trapFolder, sourceArchiveFolder,
+				Arrays.asList(PathTransformer.std().fileAsDatabaseString(currentSourceFile)));
 	}

 	/*
--- a/java/kotlin-extractor/src/main/kotlin/KotlinUsesExtractor.kt
+++ b/java/kotlin-extractor/src/main/kotlin/KotlinUsesExtractor.kt
@@ -239,8 +239,6 @@ open class KotlinUsesExtractor(
        return UseClassInstanceResult(classTypeResult, extractClass)
    }

-    private fun isArray(t: IrSimpleType) = t.isBoxedArray || t.isPrimitiveArray()
-
    private fun extractClassLaterIfExternal(c: IrClass) {
        if (isExternalDeclaration(c)) {
            extractExternalClassLater(c)
@@ -551,6 +549,22 @@ open class KotlinUsesExtractor(
                )
        }

+    /*
+    Kotlin arrays can be broken down as:
+
+    isArray(t)
+    |- t.isBoxedArray
+    |  |- t.isArray()         e.g. Array<Boolean>, Array<Boolean?>
+    |  |- t.isNullableArray() e.g. Array<Boolean>?, Array<Boolean?>?
+    |- t.isPrimitiveArray()   e.g. BooleanArray
+
+    For the corresponding Java types:
+    Boxed arrays are represented as e.g. java.lang.Boolean[].
+    Primitive arrays are represented as e.g. boolean[].
+    */
+
+    private fun isArray(t: IrType) = t.isBoxedArray || t.isPrimitiveArray()
+
    data class ArrayInfo(val elementTypeResults: TypeResults,
                         val componentTypeResults: TypeResults,
                         val dimensions: Int)
@@ -565,7 +579,7 @@ open class KotlinUsesExtractor(
     */
    private fun useArrayType(t: IrType, isPrimitiveArray: Boolean): ArrayInfo {

-        if (!t.isBoxedArray && !t.isPrimitiveArray()) {
+        if (!isArray(t)) {
            val nullableT = if (t.isPrimitiveType() && !isPrimitiveArray) t.makeNullable() else t
            val typeResults = useType(nullableT)
            return ArrayInfo(typeResults, typeResults, 0)
@@ -1141,13 +1155,13 @@ open class KotlinUsesExtractor(
                    }
                } else {
                    t.classOrNull?.let { tCls ->
-                        if (t.isArray() || t.isNullableArray()) {
+                        if (t.isBoxedArray) {
                            (t.arguments.singleOrNull() as? IrTypeProjection)?.let { elementTypeArg ->
                                val elementType = elementTypeArg.type
                                val replacedElementType = kClassToJavaClass(elementType)
                                if (replacedElementType !== elementType) {
                                    val newArg = makeTypeProjection(replacedElementType, elementTypeArg.variance)
-                                    return tCls.typeWithArguments(listOf(newArg)).codeQlWithHasQuestionMark(t.isNullableArray())
+                                    return tCls.typeWithArguments(listOf(newArg)).codeQlWithHasQuestionMark(t.isNullable())
                                }
                            }
                        }
@@ -1578,7 +1592,7 @@ open class KotlinUsesExtractor(
            }

            if (owner is IrClass) {
-                if (t.isArray() || t.isNullableArray()) {
+                if (t.isBoxedArray) {
                    val elementType = t.getArrayElementType(pluginContext.irBuiltIns)
                    val erasedElementType = erase(elementType)
                    return owner.typeWith(erasedElementType).codeQlWithHasQuestionMark(t.isNullable())
--- a/java/ql/lib/semmle/code/java/frameworks/Camel.qll
+++ b/java/ql/lib/semmle/code/java/frameworks/Camel.qll
@@ -27,8 +27,8 @@ deprecated class CamelToURI = CamelToUri;
 class CamelToBeanUri extends CamelToUri {
  CamelToBeanUri() {
    // A `<to>` element references a bean if the URI starts with "bean:", or there is no scheme.
-    matches("bean:%") or
-    not exists(indexOf(":"))
+    this.matches("bean:%") or
+    not exists(this.indexOf(":"))
  }

  /**
@@ -38,13 +38,13 @@ class CamelToBeanUri extends CamelToUri {
   * parameter parts are optional.
   */
  string getBeanIdentifier() {
-    if not exists(indexOf(":"))
+    if not exists(this.indexOf(":"))
    then result = this
    else
-      exists(int start | start = indexOf(":", 0, 0) + 1 |
-        if not exists(indexOf("?"))
-        then result = suffix(start)
-        else result = substring(start, indexOf("?", 0, 0))
+      exists(int start | start = this.indexOf(":", 0, 0) + 1 |
+        if not exists(this.indexOf("?"))
+        then result = this.suffix(start)
+        else result = this.substring(start, this.indexOf("?", 0, 0))
      )
  }

--- a/java/ql/src/Telemetry/AutomodelEndpointTypes.qll
+++ b/java/ql/src/Telemetry/AutomodelEndpointTypes.qll
@@ -0,0 +1,60 @@
+/**
+ * For internal use only.
+ *
+ * Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
+ * only predict classes defined within this file. This file is the source of truth for the integer
+ * representation of each of these classes.
+ */
+
+/** A class that can be predicted by a classifier. */
+abstract class EndpointType extends string {
+  /**
+   * Holds when the string matches the name of the sink / source type.
+   */
+  bindingset[this]
+  EndpointType() { any() }
+
+  /**
+   * Gets the name of the sink/source kind for this endpoint type as used in models-as-data.
+   *
+   * See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
+   */
+  final string getKind() { result = this }
+}
+
+/** A class for sink types that can be predicted by a classifier. */
+abstract class SinkType extends EndpointType {
+  bindingset[this]
+  SinkType() { any() }
+}
+
+/** A class for source types that can be predicted by a classifier. */
+abstract class SourceType extends EndpointType {
+  bindingset[this]
+  SourceType() { any() }
+}
+
+/** The `Negative` class for non-sinks. */
+class NegativeSinkType extends SinkType {
+  NegativeSinkType() { this = "non-sink" }
+}
+
+/** A sink relevant to the SQL injection query */
+class SqlSinkType extends SinkType {
+  SqlSinkType() { this = "sql" }
+}
+
+/** A sink relevant to the tainted path injection query. */
+class TaintedPathSinkType extends SinkType {
+  TaintedPathSinkType() { this = "tainted-path" }
+}
+
+/** A sink relevant to the SSRF query. */
+class RequestForgerySinkType extends SinkType {
+  RequestForgerySinkType() { this = "ssrf" }
+}
+
+/** A sink relevant to the command injection query. */
+class CommandInjectionSinkType extends SinkType {
+  CommandInjectionSinkType() { this = "command-injection" }
+}
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll
@@ -0,0 +1,331 @@
+/**
+ * For internal use only.
+ */
+
+private import java
+private import semmle.code.Location as Location
+private import semmle.code.java.dataflow.DataFlow
+private import semmle.code.java.dataflow.TaintTracking
+private import semmle.code.java.security.PathCreation
+private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
+private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
+private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
+private import semmle.code.java.Expr as Expr
+private import semmle.code.java.security.QueryInjection
+private import semmle.code.java.security.RequestForgery
+private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
+import AutomodelSharedCharacteristics as SharedCharacteristics
+import AutomodelEndpointTypes as AutomodelEndpointTypes
+
+/**
+ * A meta data extractor. Any Java extraction mode needs to implement exactly
+ * one instance of this class.
+ */
+abstract class MetadataExtractor extends string {
+  bindingset[this]
+  MetadataExtractor() { any() }
+
+  abstract predicate hasMetadata(
+    DataFlow::ParameterNode e, string package, string type, boolean subtypes, string name,
+    string signature, int input
+  );
+}
+
+newtype JavaRelatedLocationType =
+  MethodDoc() or
+  ClassDoc()
+
+/**
+ * A candidates implementation for framework mode.
+ *
+ * Some important notes:
+ *  - This mode is using parameters as endpoints.
+ *  - Sink- and neutral-information is being used from MaD models.
+ *  - When available, we use method- and class-java-docs as related locations.
+ */
+module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
+  // for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
+  class Endpoint = DataFlow::ParameterNode;
+
+  class EndpointType = AutomodelEndpointTypes::EndpointType;
+
+  class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
+
+  class RelatedLocation = Location::Top;
+
+  class RelatedLocationType = JavaRelatedLocationType;
+
+  // Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
+  predicate isSanitizer(Endpoint e, EndpointType t) { none() }
+
+  RelatedLocation asLocation(Endpoint e) { result = e.asParameter() }
+
+  predicate isKnownKind(string kind, string humanReadableKind, EndpointType type) {
+    kind = "read-file" and
+    humanReadableKind = "read file" and
+    type instanceof AutomodelEndpointTypes::TaintedPathSinkType
+    or
+    kind = "create-file" and
+    humanReadableKind = "create file" and
+    type instanceof AutomodelEndpointTypes::TaintedPathSinkType
+    or
+    kind = "sql" and
+    humanReadableKind = "mad modeled sql" and
+    type instanceof AutomodelEndpointTypes::SqlSinkType
+    or
+    kind = "open-url" and
+    humanReadableKind = "open url" and
+    type instanceof AutomodelEndpointTypes::RequestForgerySinkType
+    or
+    kind = "jdbc-url" and
+    humanReadableKind = "jdbc url" and
+    type instanceof AutomodelEndpointTypes::RequestForgerySinkType
+    or
+    kind = "command-injection" and
+    humanReadableKind = "command injection" and
+    type instanceof AutomodelEndpointTypes::CommandInjectionSinkType
+  }
+
+  predicate isSink(Endpoint e, string kind) {
+    exists(string package, string type, string name, string signature, string ext, string input |
+      sinkSpec(e, package, type, name, signature, ext, input) and
+      ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _)
+    )
+  }
+
+  predicate isNeutral(Endpoint e) {
+    exists(string package, string type, string name, string signature |
+      sinkSpec(e, package, type, name, signature, _, _) and
+      ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _)
+    )
+  }
+
+  additional predicate sinkSpec(
+    Endpoint e, string package, string type, string name, string signature, string ext, string input
+  ) {
+    FrameworkCandidatesImpl::getCallable(e).hasQualifiedName(package, type, name) and
+    signature = ExternalFlow::paramsString(getCallable(e)) and
+    ext = "" and
+    exists(int paramIdx | e.isParameterOf(_, paramIdx) |
+      if paramIdx = -1 then input = "Argument[this]" else input = "Argument[" + paramIdx + "]"
+    )
+  }
+
+  /**
+   * Returns the related location for the given endpoint.
+   *
+   * Related locations can be JavaDoc comments of the class or the method.
+   */
+  RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
+    type = MethodDoc() and
+    result = FrameworkCandidatesImpl::getCallable(e).(Documentable).getJavadoc()
+    or
+    type = ClassDoc() and
+    result = FrameworkCandidatesImpl::getCallable(e).getDeclaringType().(Documentable).getJavadoc()
+  }
+
+  /**
+   * Returns the callable that contains the given endpoint.
+   *
+   * Each Java mode should implement this predicate.
+   */
+  additional Callable getCallable(Endpoint e) { result = e.getEnclosingCallable() }
+}
+
+module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>;
+
+class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
+
+class Endpoint = FrameworkCandidatesImpl::Endpoint;
+
+/*
+ * Predicates that are used to surface prompt examples and candidates for classification with an ML model.
+ */
+
+/**
+ * A MetadataExtractor that extracts metadata for framework mode.
+ */
+class FrameworkModeMetadataExtractor extends MetadataExtractor {
+  FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" }
+
+  /**
+   * By convention, the subtypes property of the MaD declaration should only be
+   * true when there _can_ exist any subtypes with a different implementation.
+   *
+   * It would technically be ok to always use the value 'true', but this would
+   * break convention.
+   */
+  boolean considerSubtypes(Callable callable) {
+    if
+      callable.isStatic() or
+      callable.getDeclaringType().isStatic() or
+      callable.isFinal() or
+      callable.getDeclaringType().isFinal()
+    then result = false
+    else result = true
+  }
+
+  override predicate hasMetadata(
+    Endpoint e, string package, string type, boolean subtypes, string name, string signature,
+    int input
+  ) {
+    exists(Callable callable |
+      e.asParameter() = callable.getParameter(input) and
+      package = callable.getDeclaringType().getPackage().getName() and
+      type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
+      subtypes = this.considerSubtypes(callable) and
+      name = e.toString() and
+      signature = ExternalFlow::paramsString(callable)
+    )
+  }
+}
+
+/*
+ * EndpointCharacteristic classes that are specific to Automodel for Java.
+ */
+
+/**
+ * A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
+ *
+ * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
+ * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
+ * the dangerous/interesting thing, so we want the latter to be modeled as the sink.
+ *
+ * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
+ */
+private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
+  UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
+
+  override predicate appliesToEndpoint(Endpoint e) {
+    not FrameworkCandidatesImpl::isSink(e, _) and
+    FrameworkCandidatesImpl::getCallable(e).getName().matches("is%") and
+    FrameworkCandidatesImpl::getCallable(e).getReturnType() instanceof BooleanType
+  }
+}
+
+/**
+ * A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
+ * sink.
+ *
+ * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
+ * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
+ * dangerous/interesting thing, so we want the latter to be modeled as the sink.
+ */
+private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
+  UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
+
+  override predicate appliesToEndpoint(Endpoint e) {
+    not FrameworkCandidatesImpl::isSink(e, _) and
+    exists(Callable callable |
+      callable = FrameworkCandidatesImpl::getCallable(e) and
+      callable.getName().toLowerCase() = ["exists", "notexists"] and
+      callable.getReturnType() instanceof BooleanType
+    )
+  }
+}
+
+/**
+ * A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
+ */
+private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
+  ExceptionCharacteristic() { this = "exception" }
+
+  override predicate appliesToEndpoint(Endpoint e) {
+    FrameworkCandidatesImpl::getCallable(e).getDeclaringType().getASupertype*() instanceof
+      TypeThrowable
+  }
+}
+
+/**
+ * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
+ * are considered worth modeling.
+ */
+private class NotAModelApiParameter extends CharacteristicsImpl::UninterestingToModelCharacteristic {
+  NotAModelApiParameter() { this = "not a model API parameter" }
+
+  override predicate appliesToEndpoint(Endpoint e) {
+    not exists(ModelExclusions::ModelApi api | api.getAParameter() = e.asParameter())
+  }
+}
+
+/**
+ * A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in
+ * the standard Java modeling, because they cannot be called from outside the package.
+ */
+private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
+{
+  NonPublicMethodCharacteristic() { this = "non-public method" }
+
+  override predicate appliesToEndpoint(Endpoint e) {
+    not FrameworkCandidatesImpl::getCallable(e).isPublic()
+  }
+}
+
+/**
+ * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
+ * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
+ * an error message indicating why this combination is problematic.
+ *
+ * Copied from
+ *   javascript/ql/experimental/adaptivethreatmodeling/test/endpoint_large_scale/ContradictoryEndpointCharacteristics.ql
+ */
+predicate erroneousEndpoints(
+  Endpoint endpoint, EndpointCharacteristic characteristic,
+  AutomodelEndpointTypes::EndpointType endpointType, float confidence, string errorMessage,
+  boolean ignoreKnownModelingErrors
+) {
+  // An endpoint's characteristics should not include positive indicators with medium/high confidence for more than one
+  // sink/source type (including the negative type).
+  exists(
+    EndpointCharacteristic characteristic2, AutomodelEndpointTypes::EndpointType endpointClass2,
+    float confidence2
+  |
+    endpointType != endpointClass2 and
+    (
+      endpointType instanceof AutomodelEndpointTypes::SinkType and
+      endpointClass2 instanceof AutomodelEndpointTypes::SinkType
+      or
+      endpointType instanceof AutomodelEndpointTypes::SourceType and
+      endpointClass2 instanceof AutomodelEndpointTypes::SourceType
+    ) and
+    characteristic.appliesToEndpoint(endpoint) and
+    characteristic2.appliesToEndpoint(endpoint) and
+    characteristic.hasImplications(endpointType, true, confidence) and
+    characteristic2.hasImplications(endpointClass2, true, confidence2) and
+    confidence > SharedCharacteristics::mediumConfidence() and
+    confidence2 > SharedCharacteristics::mediumConfidence() and
+    (
+      ignoreKnownModelingErrors = true and
+      not knownOverlappingCharacteristics(characteristic, characteristic2)
+      or
+      ignoreKnownModelingErrors = false
+    )
+  ) and
+  errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
+  or
+  // An endpoint's characteristics should not include positive indicators with medium/high confidence for some class and
+  // also include negative indicators with medium/high confidence for this same class.
+  exists(EndpointCharacteristic characteristic2, float confidence2 |
+    characteristic.appliesToEndpoint(endpoint) and
+    characteristic2.appliesToEndpoint(endpoint) and
+    characteristic.hasImplications(endpointType, true, confidence) and
+    characteristic2.hasImplications(endpointType, false, confidence2) and
+    confidence > SharedCharacteristics::mediumConfidence() and
+    confidence2 > SharedCharacteristics::mediumConfidence()
+  ) and
+  ignoreKnownModelingErrors = false and
+  errorMessage = "Endpoint has high-confidence positive and negative indicators for the same class"
+}
+
+/**
+ * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
+ * have some overlap in their results. This indicates a problem with the underlying Java modeling. Specifically,
+ * `PathCreation` is prone to FPs.
+ */
+private predicate knownOverlappingCharacteristics(
+  EndpointCharacteristic characteristic1, EndpointCharacteristic characteristic2
+) {
+  characteristic1 != characteristic2 and
+  characteristic1 = ["mad taint step", "create path", "read file", "known non-sink"] and
+  characteristic2 = ["mad taint step", "create path", "read file", "known non-sink"]
+}
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql
@@ -0,0 +1,50 @@
+/**
+ * Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for
+ * classification with an ML model.
+ *
+ * Note: This query does not actually classify the endpoints using the model.
+ *
+ * @name Automodel candidates
+ * @description A query to extract automodel candidates.
+ * @kind problem
+ * @severity info
+ * @id java/ml/extract-automodel-candidates
+ * @tags internal automodel extract candidates
+ */
+
+private import AutomodelFrameworkModeCharacteristics
+private import AutomodelSharedUtil
+
+from
+  Endpoint endpoint, string message, MetadataExtractor meta, string package, string type,
+  boolean subtypes, string name, string signature, int input
+where
+  not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
+    u.appliesToEndpoint(endpoint)
+  ) and
+  // If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
+  // don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
+  // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
+  // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
+  // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
+  not CharacteristicsImpl::isSink(endpoint, _) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
+  // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
+  // a non-sink, and we surface only endpoints that have at least one such sink type.
+  message =
+    strictconcat(AutomodelEndpointTypes::SinkType sinkType |
+      not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
+      CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
+    |
+      sinkType, ", "
+    )
+select endpoint,
+  message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
+  package.(DollarAtString), "package", //
+  type.(DollarAtString), "type", //
+  subtypes.toString().(DollarAtString), "subtypes", //
+  name.(DollarAtString), "name", //
+  signature.(DollarAtString), "signature", //
+  input.toString().(DollarAtString), "input" //
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractNegativeExamples.ql
@@ -0,0 +1,47 @@
+/**
+ * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
+ *
+ * @name Negative examples (experimental)
+ * @kind problem
+ * @severity info
+ * @id java/ml/non-sink
+ * @tags internal automodel extract examples negative
+ */
+
+private import AutomodelFrameworkModeCharacteristics
+private import AutomodelEndpointTypes
+private import AutomodelSharedUtil
+
+from
+  Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
+  MetadataExtractor meta, string package, string type, boolean subtypes, string name,
+  string signature, int input
+where
+  characteristic.appliesToEndpoint(endpoint) and
+  confidence >= SharedCharacteristics::highConfidence() and
+  characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
+  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
+  // certain about in the prompt.
+  not erroneousEndpoints(endpoint, _, _, _, _, false) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
+  // It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
+  // treated by the actual query as a sanitizer, since the final logic is something like
+  // `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
+  // they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
+  not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
+    not positiveType instanceof NegativeSinkType and
+    characteristic2.appliesToEndpoint(endpoint) and
+    confidence2 >= SharedCharacteristics::maximalConfidence() and
+    characteristic2.hasImplications(positiveType, true, confidence2)
+  ) and
+  message = characteristic
+select endpoint,
+  message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
+  package.(DollarAtString), "package", //
+  type.(DollarAtString), "type", //
+  subtypes.toString().(DollarAtString), "subtypes", //
+  name.(DollarAtString), "name", //
+  signature.(DollarAtString), "signature", //
+  input.toString().(DollarAtString), "input" //
--- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql
+++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql
@@ -0,0 +1,34 @@
+/**
+ * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
+ *
+ * @name Positive examples (experimental)
+ * @kind problem
+ * @severity info
+ * @id java/ml/known-sink
+ * @tags internal automodel extract examples positive
+ */
+
+private import AutomodelFrameworkModeCharacteristics
+private import AutomodelEndpointTypes
+private import AutomodelSharedUtil
+
+from
+  Endpoint endpoint, SinkType sinkType, MetadataExtractor meta, string package, string type,
+  boolean subtypes, string name, string signature, int input
+where
+  // Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
+  // certain about in the prompt.
+  not erroneousEndpoints(endpoint, _, _, _, _, false) and
+  meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
+  // Extract positive examples of sinks belonging to the existing ATM query configurations.
+  CharacteristicsImpl::isKnownSink(endpoint, sinkType)
+select endpoint,
+  sinkType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
+  CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
+  package.(DollarAtString), "package", //
+  type.(DollarAtString), "type", //
+  subtypes.toString().(DollarAtString), "subtypes", //
+  name.(DollarAtString), "name", //
+  signature.(DollarAtString), "signature", //
+  input.toString().(DollarAtString), "input" //
--- a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll
+++ b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll
@@ -0,0 +1,305 @@
+float maximalConfidence() { result = 1.0 }
+
+float highConfidence() { result = 0.9 }
+
+float mediumConfidence() { result = 0.6 }
+
+/**
+ * A specification of how to  instantiate the shared characteristics for a given candidate class.
+ *
+ * The `CandidateSig` implementation specifies a type to use for Endpoints (eg., `ParameterNode`), as well as a type
+ * to label endpoint classes (the `EndpointType`). One of the endpoint classes needs to be a 'negative' class, meaning
+ *   "not any of the other known endpoint types".
+ */
+signature module CandidateSig {
+  /**
+   * An endpoint is a potential candidate for modeling. This will typically be bound to the language's
+   * DataFlow node class, or a subtype thereof.
+   */
+  class Endpoint;
+
+  /**
+   * A related location for an endpoint. This will typically be bound to the supertype of all AST nodes (eg., `Top`).
+   */
+  class RelatedLocation;
+
+  /**
+   * A label for a related location.
+   *
+   * Eg., method-doc, class-doc, etc.
+   */
+  class RelatedLocationType;
+
+  /**
+   * A class kind for an endpoint.
+   */
+  class EndpointType extends string;
+
+  /**
+   * An EndpointType that denotes the absence of any sink.
+   */
+  class NegativeEndpointType extends EndpointType;
+
+  /**
+   * Gets the endpoint as a location.
+   *
+   * This is a utility function to convert an endpoint to its corresponding location.
+   */
+  RelatedLocation asLocation(Endpoint e);
+
+  /**
+   * Defines what MaD kinds are known, and what endpoint type they correspond to.
+   */
+  predicate isKnownKind(string kind, string humanReadableLabel, EndpointType type);
+
+  /**
+   * Holds if `e` is a flow sanitizer, and has type `t`.
+   */
+  predicate isSanitizer(Endpoint e, EndpointType t);
+
+  /**
+   * Holds if `e` is a sink with the label `kind`.
+   */
+  predicate isSink(Endpoint e, string kind);
+
+  /**
+   * Holds if `e` is not a sink of any kind.
+   */
+  predicate isNeutral(Endpoint e);
+
+  /**
+   * Gets a related location.
+   *
+   * A related location is a source code location that may hold extra information about an endpoint that can be useful
+   * to the machine learning model.
+   *
+   * For example, a related location for a method call may be the documentation comment of a method.
+   */
+  RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType name);
+}
+
+/**
+ * A set of shared characteristics for a given candidate class.
+ *
+ * This module is language-agnostic, although the `CandidateSig` module will be language-specific.
+ *
+ * The language specific implementation can also further extend the behavior of this module by adding additional
+ *   implementations of endpoint characteristics exported by this module.
+ */
+module SharedCharacteristics<CandidateSig Candidate> {
+  predicate isSink = Candidate::isSink/2;
+
+  predicate isNeutral = Candidate::isNeutral/1;
+
+  /**
+   * Holds if `sink` is a known sink of type `endpointType`.
+   */
+  predicate isKnownSink(Candidate::Endpoint sink, Candidate::EndpointType endpointType) {
+    // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
+    // known sink for the class.
+    not endpointType instanceof Candidate::NegativeEndpointType and
+    exists(EndpointCharacteristic characteristic |
+      characteristic.appliesToEndpoint(sink) and
+      characteristic.hasImplications(endpointType, true, maximalConfidence())
+    )
+  }
+
+  /**
+   * Holds if the candidate sink `candidateSink` should be considered as a possible sink of type `sinkType`, and
+   * classified by the ML model. A candidate sink is a node that cannot be excluded from `sinkType` based on its
+   * characteristics.
+   */
+  predicate isSinkCandidate(Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType) {
+    not sinkType instanceof Candidate::NegativeEndpointType and
+    not exists(getAReasonSinkExcluded(candidateSink, sinkType))
+  }
+
+  /**
+   * Gets the related location of `e` with name `name`, if it exists.
+   * Otherwise, gets the candidate itself.
+   */
+  Candidate::RelatedLocation getRelatedLocationOrCandidate(
+    Candidate::Endpoint e, Candidate::RelatedLocationType type
+  ) {
+    if exists(Candidate::getRelatedLocation(e, type))
+    then result = Candidate::getRelatedLocation(e, type)
+    else result = Candidate::asLocation(e)
+  }
+
+  /**
+   * Gets the list of characteristics that cause `candidateSink` to be excluded as an effective sink for a given sink
+   * type.
+   */
+  EndpointCharacteristic getAReasonSinkExcluded(
+    Candidate::Endpoint candidateSink, Candidate::EndpointType sinkType
+  ) {
+    // An endpoint is a sink candidate if none of its characteristics give much indication whether or not it is a sink.
+    not sinkType instanceof Candidate::NegativeEndpointType and
+    result.appliesToEndpoint(candidateSink) and
+    (
+      // Exclude endpoints that have a characteristic that implies they're not sinks for _any_ sink type.
+      exists(float confidence |
+        confidence >= mediumConfidence() and
+        result.hasImplications(any(Candidate::NegativeEndpointType t), true, confidence)
+      )
+      or
+      // Exclude endpoints that have a characteristic that implies they're not sinks for _this particular_ sink type.
+      exists(float confidence |
+        confidence >= mediumConfidence() and
+        result.hasImplications(sinkType, false, confidence)
+      )
+    )
+  }
+
+  /**
+   * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
+   * about whether to include the endpoint in the training set and with what kind, as well as whether to score the
+   * endpoint at inference time.
+   */
+  abstract class EndpointCharacteristic extends string {
+    /**
+     * Holds for the string that is the name of the characteristic. This should describe some property of an endpoint
+     * that is meaningful for determining whether it's a sink, and if so, of which sink type.
+     */
+    bindingset[this]
+    EndpointCharacteristic() { any() }
+
+    /**
+     * Holds for endpoints that have this characteristic.
+     */
+    abstract predicate appliesToEndpoint(Candidate::Endpoint n);
+
+    /**
+     * This predicate describes what the characteristic tells us about an endpoint.
+     *
+     * Params:
+     * endpointType: The sink/source type.
+     * isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if
+     * false, it indicates that it _isn't_ a member of the class.
+     * confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
+     * belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak
+     * indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with
+     * this characteristic definitively do/don't belong to the class.
+     */
+    abstract predicate hasImplications(
+      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
+    );
+
+    /** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
+    final float getHighConfidenceThreshold() { result = 0.8 }
+  }
+
+  /**
+   * A high-confidence characteristic that indicates that an endpoint is a sink of a specified type. These endpoints can
+   * be used as positive samples for training or for a few-shot prompt.
+   */
+  abstract class SinkCharacteristic extends EndpointCharacteristic {
+    bindingset[this]
+    SinkCharacteristic() { any() }
+
+    abstract Candidate::EndpointType getSinkType();
+
+    final override predicate hasImplications(
+      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
+    ) {
+      endpointType = this.getSinkType() and
+      isPositiveIndicator = true and
+      confidence = maximalConfidence()
+    }
+  }
+
+  /**
+   * A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be
+   * used as negative samples for training or for a few-shot prompt.
+   */
+  abstract class NotASinkCharacteristic extends EndpointCharacteristic {
+    bindingset[this]
+    NotASinkCharacteristic() { any() }
+
+    override predicate hasImplications(
+      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
+    ) {
+      endpointType instanceof Candidate::NegativeEndpointType and
+      isPositiveIndicator = true and
+      confidence = highConfidence()
+    }
+  }
+
+  /**
+   * A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These
+   * endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should
+   * not, however, be used as negative samples for training or for a few-shot prompt, because they may include a small
+   * number of sinks.
+   */
+  abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
+    bindingset[this]
+    LikelyNotASinkCharacteristic() { any() }
+
+    override predicate hasImplications(
+      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
+    ) {
+      endpointType instanceof Candidate::NegativeEndpointType and
+      isPositiveIndicator = true and
+      confidence = mediumConfidence()
+    }
+  }
+
+  /**
+   * A characteristic that indicates not necessarily that an endpoint is not a sink, but rather that it is not a sink
+   * that's interesting to model in the standard Java libraries. These filters should be removed when extracting sink
+   * candidates within a user's codebase for customized modeling.
+   *
+   * These endpoints should not be used as negative samples for training or for a few-shot prompt, because they are not
+   * necessarily non-sinks.
+   */
+  abstract class UninterestingToModelCharacteristic extends EndpointCharacteristic {
+    bindingset[this]
+    UninterestingToModelCharacteristic() { any() }
+
+    override predicate hasImplications(
+      Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
+    ) {
+      endpointType instanceof Candidate::NegativeEndpointType and
+      isPositiveIndicator = true and
+      confidence = mediumConfidence()
+    }
+  }
+
+  /**
+   * Contains default implementations that are derived solely from the `CandidateSig` implementation.
+   */
+  private module DefaultCharacteristicImplementations {
+    /**
+     * Endpoints identified as sinks by the `CandidateSig` implementation are sinks with maximal confidence.
+     */
+    private class KnownSinkCharacteristic extends SinkCharacteristic {
+      string madKind;
+      Candidate::EndpointType endpointType;
+
+      KnownSinkCharacteristic() { Candidate::isKnownKind(madKind, this, endpointType) }
+
+      override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSink(e, madKind) }
+
+      override Candidate::EndpointType getSinkType() { result = endpointType }
+    }
+
+    /**
+     * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
+     */
+    private class NeutralModelCharacteristic extends NotASinkCharacteristic {
+      NeutralModelCharacteristic() { this = "known non-sink" }
+
+      override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
+    }
+
+    /**
+     * A negative characteristic that indicates that an endpoint is not part of the source code for the project being
+     * analyzed.
+     */
+    private class IsSanitizerCharacteristic extends NotASinkCharacteristic {
+      IsSanitizerCharacteristic() { this = "external" }
+
+      override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSanitizer(e, _) }
+    }
+  }
+}
--- a/java/ql/src/Telemetry/AutomodelSharedUtil.qll
+++ b/java/ql/src/Telemetry/AutomodelSharedUtil.qll
@@ -0,0 +1,21 @@
+/**
+ * A helper class to represent a string value that can be returned by a query using $@ notation.
+ *
+ * It extends `string`, but adds a mock `hasLocationInfo` method that returns the string itself as the file name.
+ *
+ * Use this, when you want to return a string value from a query using $@ notation - the string value
+ * will be included in the sarif file.
+ *
+ *
+ * Background information on `hasLocationInfo`:
+ * https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-location-information
+ */
+class DollarAtString extends string {
+  bindingset[this]
+  DollarAtString() { any() }
+
+  bindingset[this]
+  predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
+    path = this and sl = 1 and sc = 1 and el = 1 and ec = 1
+  }
+}
--- a/java/ql/src/qlpack.yml
+++ b/java/ql/src/qlpack.yml
@@ -12,4 +12,4 @@ dependencies:
  codeql/util: ${workspace}
 dataExtensions:
  - Telemetry/ExtractorInformation.yml
-warnOmImplicitThis: true
+warnOnImplicitThis: true
--- a/java/ql/test/TestUtilities/InlineFlowTest.qll
+++ b/java/ql/test/TestUtilities/InlineFlowTest.qll
@@ -73,7 +73,7 @@ class InlineFlowTest extends InlineExpectationsTest {

  override predicate hasActualResult(Location location, string element, string tag, string value) {
    tag = "hasValueFlow" and
-    exists(DataFlow::Node src, DataFlow::Node sink | hasValueFlow(src, sink) |
+    exists(DataFlow::Node src, DataFlow::Node sink | this.hasValueFlow(src, sink) |
      sink.getLocation() = location and
      element = sink.toString() and
      if exists(getSourceArgString(src)) then value = getSourceArgString(src) else value = ""
@@ -81,7 +81,7 @@ class InlineFlowTest extends InlineExpectationsTest {
    or
    tag = "hasTaintFlow" and
    exists(DataFlow::Node src, DataFlow::Node sink |
-      hasTaintFlow(src, sink) and not hasValueFlow(src, sink)
+      this.hasTaintFlow(src, sink) and not this.hasValueFlow(src, sink)
    |
      sink.getLocation() = location and
      element = sink.toString() and
--- a/javascript/ql/lib/qlpack.yml
+++ b/javascript/ql/lib/qlpack.yml
@@ -12,3 +12,4 @@ dependencies:
  codeql/yaml: ${workspace}
 dataExtensions:
  - semmle/javascript/frameworks/**/model.yml
+warnOnImplicitThis: true
--- a/javascript/ql/lib/semmle/javascript/frameworks/AngularJS/AngularJSCore.qll
+++ b/javascript/ql/lib/semmle/javascript/frameworks/AngularJS/AngularJSCore.qll
@@ -507,7 +507,7 @@ class DirectiveTargetName extends string {
   * `:` and `_` count as component delimiters.
   */
  string getRawComponent(int i) {
-    result = toLowerCase().regexpFind("(?<=^|[-:_])[a-zA-Z0-9]+(?=$|[-:_])", i, _)
+    result = this.toLowerCase().regexpFind("(?<=^|[-:_])[a-zA-Z0-9]+(?=$|[-:_])", i, _)
  }

  /**
--- a/javascript/ql/lib/semmle/javascript/linters/ESLint.qll
+++ b/javascript/ql/lib/semmle/javascript/linters/ESLint.qll
@@ -10,10 +10,12 @@ module ESLint {
   */
  abstract class Configuration extends Locatable {
    /** Gets the folder in which this configuration file is located. */
-    private Folder getEnclosingFolder() { result = getFile().getParentContainer() }
+    private Folder getEnclosingFolder() { result = this.getFile().getParentContainer() }

    /** Holds if this configuration file applies to the code in `tl`. */
-    predicate appliesTo(TopLevel tl) { tl.getFile().getParentContainer+() = getEnclosingFolder() }
+    predicate appliesTo(TopLevel tl) {
+      tl.getFile().getParentContainer+() = this.getEnclosingFolder()
+    }

    /** Gets the `globals` configuration object of this file, if any. */
    abstract ConfigurationObject getGlobals();
@@ -39,11 +41,11 @@ module ESLint {
  /** An `.eslintrc.json` file. */
  private class EslintrcJson extends JsonConfiguration {
    EslintrcJson() {
-      isTopLevel() and
-      exists(string n | n = getFile().getBaseName() | n = ".eslintrc.json" or n = ".eslintrc")
+      this.isTopLevel() and
+      exists(string n | n = this.getFile().getBaseName() | n = ".eslintrc.json" or n = ".eslintrc")
    }

-    override ConfigurationObject getGlobals() { result = getPropValue("globals") }
+    override ConfigurationObject getGlobals() { result = this.getPropValue("globals") }
  }

  /** An ESLint configuration object in JSON format. */
@@ -51,7 +53,7 @@ module ESLint {
    override Configuration getConfiguration() { this = result.(JsonConfiguration).getPropValue(_) }

    override boolean getBooleanProperty(string p) {
-      exists(string v | v = getPropValue(p).(JsonBoolean).getValue() |
+      exists(string v | v = this.getPropValue(p).(JsonBoolean).getValue() |
        v = "true" and result = true
        or
        v = "false" and result = false
@@ -62,7 +64,7 @@ module ESLint {
  /** An `.eslintrc.yaml` file. */
  private class EslintrcYaml extends Configuration instanceof YamlMapping, YamlDocument {
    EslintrcYaml() {
-      exists(string n | n = getFile().getBaseName() |
+      exists(string n | n = this.(Locatable).getFile().getBaseName() |
        n = ".eslintrc.yaml" or n = ".eslintrc.yml" or n = ".eslintrc"
      )
    }
@@ -91,7 +93,7 @@ module ESLint {
      exists(PackageJson pkg | this = pkg.getPropValue("eslintConfig"))
    }

-    override ConfigurationObject getGlobals() { result = getPropValue("globals") }
+    override ConfigurationObject getGlobals() { result = this.getPropValue("globals") }
  }

  /** An ESLint `globals` configuration object. */
@@ -99,10 +101,12 @@ module ESLint {
    GlobalsConfigurationObject() { this = any(Configuration cfg).getGlobals() }

    override predicate declaresGlobal(string name, boolean writable) {
-      getBooleanProperty(name) = writable
+      this.getBooleanProperty(name) = writable
    }

-    override predicate appliesTo(ExprOrStmt s) { getConfiguration().appliesTo(s.getTopLevel()) }
+    override predicate appliesTo(ExprOrStmt s) {
+      this.getConfiguration().appliesTo(s.getTopLevel())
+    }

    abstract override Configuration getConfiguration();

--- a/javascript/ql/lib/semmle/javascript/meta/ExtractionMetrics.qll
+++ b/javascript/ql/lib/semmle/javascript/meta/ExtractionMetrics.qll
@@ -17,22 +17,22 @@ module ExtractionMetrics {
    /**
     * Gets the CPU time in nanoseconds it took to extract this file.
     */
-    float getCpuTime() { result = strictsum(getTime(_, 0)) }
+    float getCpuTime() { result = strictsum(this.getTime(_, 0)) }

    /**
     * Gets the wall-clock time in nanoseconds it took to extract this file.
     */
-    float getWallclockTime() { result = strictsum(getTime(_, 1)) }
+    float getWallclockTime() { result = strictsum(this.getTime(_, 1)) }

    /**
     * Gets the CPU time in nanoseconds it took to process phase `phaseName` during the extraction of this file.
     */
-    float getCpuTime(PhaseName phaseName) { result = getTime(phaseName, 0) }
+    float getCpuTime(PhaseName phaseName) { result = this.getTime(phaseName, 0) }

    /**
     * Gets the wall-clock time in nanoseconds it took to process phase `phaseName` during the extraction of this file.
     */
-    float getWallclockTime(PhaseName phaseName) { result = getTime(phaseName, 1) }
+    float getWallclockTime(PhaseName phaseName) { result = this.getTime(phaseName, 1) }

    /**
     * Holds if this file was extracted from the trap cache.
@@ -60,7 +60,7 @@ module ExtractionMetrics {
        ) = time
      |
        // assume the cache-lookup was for free
-        if isFromCache() then result = 0 else result = time
+        if this.isFromCache() then result = 0 else result = time
      )
    }
  }
--- a/javascript/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.js
+++ b/javascript/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.js
@@ -1,5 +1,5 @@
 var cp = require("child_process");

 module.exports = function download(path, callback) {
-  cp.exec("wget " + path, callback);
+  cp.execSync("wget " + path, callback);
 }
--- a/javascript/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.js
+++ b/javascript/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.js
@@ -1,5 +1,5 @@
 var cp = require("child_process");

 module.exports = function download(path, callback) {
-  cp.execFile("wget", [path], callback);
+  cp.execFileSync("wget", [path], callback);
 }
--- a/javascript/ql/src/Security/CWE-807/example.inc.qhelp
+++ b/javascript/ql/src/Security/CWE-807/example.inc.qhelp
@@ -18,7 +18,7 @@
 			<p>

 				This security check is, however, insufficient since an
-				attacker can craft his cookie values to match those of any user.  To
+				attacker can craft their cookie values to match those of any user.  To
 				prevent this, the server can cryptographically sign the security
 				critical cookie values:

--- a/javascript/ql/src/qlpack.yml
+++ b/javascript/ql/src/qlpack.yml
@@ -11,3 +11,4 @@ dependencies:
  codeql/suite-helpers: ${workspace}
  codeql/typos: ${workspace}
  codeql/util: ${workspace}
+warnOnImplicitThis: true
--- a/javascript/ql/test/qlpack.yml
+++ b/javascript/ql/test/qlpack.yml
@@ -7,3 +7,4 @@ extractor: javascript
 tests: .
 dataExtensions:
  - library-tests/DataExtensions/*.model.yml
+warnOnImplicitThis: true
--- a/javascript/ql/test/tutorials/Validating
+++ b/javascript/ql/test/tutorials/Validating
@@ -34,7 +34,7 @@ class RamlResource extends YamlMapping {
  /** Get the method for this resource with the given verb. */
  RamlMethod getMethod(string verb) {
    verb = httpVerb() and
-    result = lookup(verb)
+    result = this.lookup(verb)
  }
 }

--- a/javascript/ql/test/tutorials/Validating
+++ b/javascript/ql/test/tutorials/Validating
@@ -2,7 +2,7 @@ import javascript

 /** A RAML specification. */
 class RamlSpec extends YamlDocument, YamlMapping {
-  RamlSpec() { getLocation().getFile().getExtension() = "raml" }
+  RamlSpec() { this.getLocation().getFile().getExtension() = "raml" }
 }

 from RamlSpec s
--- a/javascript/ql/test/tutorials/Validating
+++ b/javascript/ql/test/tutorials/Validating
@@ -4,13 +4,13 @@ string httpVerb() { result = ["get", "put", "post", "delete"] }

 /** A RAML specification. */
 class RamlSpec extends YamlDocument, YamlMapping {
-  RamlSpec() { getLocation().getFile().getExtension() = "raml" }
+  RamlSpec() { this.getLocation().getFile().getExtension() = "raml" }
 }

 /** A RAML resource specification. */
 class RamlResource extends YamlMapping {
  RamlResource() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping m, string name |
      this = m.lookup(name) and
      name.matches("/%")
@@ -30,14 +30,14 @@ class RamlResource extends YamlMapping {
  /** Get the method for this resource with the given verb. */
  RamlMethod getMethod(string verb) {
    verb = httpVerb() and
-    result = lookup(verb)
+    result = this.lookup(verb)
  }
 }

 /** A RAML method specification. */
 class RamlMethod extends YamlValue {
  RamlMethod() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping obj | this = obj.lookup(httpVerb()))
  }

--- a/javascript/ql/test/tutorials/Validating
+++ b/javascript/ql/test/tutorials/Validating
@@ -4,13 +4,13 @@ string httpVerb() { result = ["get", "put", "post", "delete"] }

 /** A RAML specification. */
 class RamlSpec extends YamlDocument, YamlMapping {
-  RamlSpec() { getLocation().getFile().getExtension() = "raml" }
+  RamlSpec() { this.getLocation().getFile().getExtension() = "raml" }
 }

 /** A RAML resource specification. */
 class RamlResource extends YamlMapping {
  RamlResource() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping m, string name |
      this = m.lookup(name) and
      name.matches("/%")
@@ -30,13 +30,13 @@ class RamlResource extends YamlMapping {
  /** Get the method for this resource with the given verb. */
  RamlMethod getMethod(string verb) {
    verb = httpVerb() and
-    result = lookup(verb)
+    result = this.lookup(verb)
  }
 }

 class RamlMethod extends YamlValue {
  RamlMethod() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping obj | this = obj.lookup(httpVerb()))
  }
 }
--- a/javascript/ql/test/tutorials/Validating
+++ b/javascript/ql/test/tutorials/Validating
@@ -4,13 +4,13 @@ string httpVerb() { result = ["get", "put", "post", "delete"] }

 /** A RAML specification. */
 class RamlSpec extends YamlDocument, YamlMapping {
-  RamlSpec() { getLocation().getFile().getExtension() = "raml" }
+  RamlSpec() { this.getLocation().getFile().getExtension() = "raml" }
 }

 /** A RAML resource specification. */
 class RamlResource extends YamlMapping {
  RamlResource() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping m, string name |
      this = m.lookup(name) and
      name.matches("/%")
@@ -30,14 +30,14 @@ class RamlResource extends YamlMapping {
  /** Get the method for this resource with the given verb. */
  RamlMethod getMethod(string verb) {
    verb = httpVerb() and
-    result = lookup(verb)
+    result = this.lookup(verb)
  }
 }

 /** A RAML method specification. */
 class RamlMethod extends YamlValue {
  RamlMethod() {
-    getDocument() instanceof RamlSpec and
+    this.getDocument() instanceof RamlSpec and
    exists(YamlMapping obj | this = obj.lookup(httpVerb()))
  }

--- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll
@@ -28,6 +28,27 @@ module SummaryComponent {
  /** Gets a summary component that represents a list element. */
  SummaryComponent listElement() { result = content(any(ListElementContent c)) }

+  /** Gets a summary component that represents a set element. */
+  SummaryComponent setElement() { result = content(any(SetElementContent c)) }
+
+  /** Gets a summary component that represents a tuple element. */
+  SummaryComponent tupleElement(int index) {
+    exists(TupleElementContent c | c.getIndex() = index and result = content(c))
+  }
+
+  /** Gets a summary component that represents a dictionary element. */
+  SummaryComponent dictionaryElement(string key) {
+    exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
+  }
+
+  /** Gets a summary component that represents a dictionary element at any key. */
+  SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
+
+  /** Gets a summary component that represents an attribute element. */
+  SummaryComponent attribute(string attr) {
+    exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
+  }
+
  /** Gets a summary component that represents the return value of a call. */
  SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
 }
--- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
@@ -105,6 +105,27 @@ predicate neutralSummaryElement(FlowSummary::SummarizedCallable c, string proven
 SummaryComponent interpretComponentSpecific(AccessPathToken c) {
  c = "ListElement" and
  result = FlowSummary::SummaryComponent::listElement()
+  or
+  c = "SetElement" and
+  result = FlowSummary::SummaryComponent::setElement()
+  or
+  exists(int index |
+    c.getAnArgument("TupleElement") = index.toString() and
+    result = FlowSummary::SummaryComponent::tupleElement(index)
+  )
+  or
+  exists(string key |
+    c.getAnArgument("DictionaryElement") = key and
+    result = FlowSummary::SummaryComponent::dictionaryElement(key)
+  )
+  or
+  c = "DictionaryElementAny" and
+  result = FlowSummary::SummaryComponent::dictionaryElementAny()
+  or
+  exists(string attr |
+    c.getAnArgument("Attribute") = attr and
+    result = FlowSummary::SummaryComponent::attribute(attr)
+  )
 }

 /** Gets the textual representation of a summary component in the format used for flow summaries. */
--- a/ql/extractor/src/autobuilder.rs
+++ b/ql/extractor/src/autobuilder.rs
@@ -1,48 +1,21 @@
-use clap::Args;
 use std::env;
 use std::path::PathBuf;
-use std::process::Command;
+
+use clap::Args;
+
+use codeql_extractor::autobuilder;

 #[derive(Args)]
 // The autobuilder takes no command-line options, but this may change in the future.
 pub struct Options {}

 pub fn run(_: Options) -> std::io::Result<()> {
-    let dist = env::var("CODEQL_DIST").expect("CODEQL_DIST not set");
-    let db = env::var("CODEQL_EXTRACTOR_QL_WIP_DATABASE")
+    let database = env::var("CODEQL_EXTRACTOR_QL_WIP_DATABASE")
        .expect("CODEQL_EXTRACTOR_QL_WIP_DATABASE not set");
-    let codeql = if env::consts::OS == "windows" {
-        "codeql.exe"
-    } else {
-        "codeql"
-    };
-    let codeql: PathBuf = [&dist, codeql].iter().collect();
-    let mut cmd = Command::new(codeql);
-    cmd.arg("database")
-        .arg("index-files")
-        .arg("--include-extension=.ql")
-        .arg("--include-extension=.qll")
-        .arg("--include-extension=.dbscheme")
-        .arg("--include-extension=.json")
-        .arg("--include-extension=.jsonc")
-        .arg("--include-extension=.jsonl")
-        .arg("--include=**/qlpack.yml")
-        .arg("--include=deprecated.blame")
-        .arg("--size-limit=10m")
-        .arg("--language=ql")
-        .arg("--working-dir=.")
-        .arg(db);

-    for line in env::var("LGTM_INDEX_FILTERS")
-        .unwrap_or_default()
-        .split('\n')
-    {
-        if let Some(stripped) = line.strip_prefix("include:") {
-            cmd.arg("--also-match=".to_owned() + stripped);
-        } else if let Some(stripped) = line.strip_prefix("exclude:") {
-            cmd.arg("--exclude=".to_owned() + stripped);
-        }
-    }
-    let exit = &cmd.spawn()?.wait()?;
-    std::process::exit(exit.code().unwrap_or(1))
+    autobuilder::Autobuilder::new("ql", PathBuf::from(database))
+        .include_extensions(&[".ql", ".qll", ".dbscheme", ".json", ".jsonc", ".jsonl"])
+        .include_globs(&["**/qlpack.yml", "deprecated.blame"])
+        .size_limit("10m")
+        .run()
 }
--- a/ql/ql/src/codeql/Locations.qll
+++ b/ql/ql/src/codeql/Locations.qll
@@ -25,13 +25,13 @@ class Location extends @location {
  int getEndColumn() { locations_default(this, _, _, _, _, result) }

  /** Gets the number of lines covered by this location. */
-  int getNumLines() { result = getEndLine() - getStartLine() + 1 }
+  int getNumLines() { result = this.getEndLine() - this.getStartLine() + 1 }

  /** Gets a textual representation of this element. */
  cached
  string toString() {
    exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
-      hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and
+      this.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and
      result = filepath + "@" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn
    )
  }
--- a/ql/ql/src/codeql_ql/style/UseSetLiteralQuery.qll
+++ b/ql/ql/src/codeql_ql/style/UseSetLiteralQuery.qll
@@ -16,7 +16,7 @@ class DisjunctionChain extends Disjunction {
  Formula getOperand(int i) {
    result =
      rank[i + 1](Formula operand, Location l |
-        operand = getAnOperand*() and
+        operand = this.getAnOperand*() and
        not operand instanceof Disjunction and
        l = operand.getLocation()
      |
@@ -33,16 +33,16 @@ class DisjunctionChain extends Disjunction {
 */
 class EqualsLiteral extends ComparisonFormula {
  EqualsLiteral() {
-    getOperator() = "=" and
-    getAnOperand() instanceof Literal
+    this.getOperator() = "=" and
+    this.getAnOperand() instanceof Literal
  }

  AstNode getOther() {
-    result = getAnOperand() and
+    result = this.getAnOperand() and
    not result instanceof Literal
  }

-  Literal getLiteral() { result = getAnOperand() }
+  Literal getLiteral() { result = this.getAnOperand() }
 }

 /**
@@ -60,29 +60,33 @@ class DisjunctionEqualsLiteral extends DisjunctionChain {
  DisjunctionEqualsLiteral() {
    // VarAccess on the same variable
    exists(VarDef v |
-      forex(Formula f | f = getOperand(_) |
+      forex(Formula f | f = this.getOperand(_) |
        f.(EqualsLiteral).getAnOperand().(VarAccess).getDeclaration() = v
      ) and
-      firstOperand = getOperand(0).(EqualsLiteral).getAnOperand() and
+      firstOperand = this.getOperand(0).(EqualsLiteral).getAnOperand() and
      firstOperand.(VarAccess).getDeclaration() = v
    )
    or
    // FieldAccess on the same variable
    exists(FieldDecl v |
-      forex(Formula f | f = getOperand(_) |
+      forex(Formula f | f = this.getOperand(_) |
        f.(EqualsLiteral).getAnOperand().(FieldAccess).getDeclaration() = v
      ) and
-      firstOperand = getOperand(0).(EqualsLiteral).getAnOperand() and
+      firstOperand = this.getOperand(0).(EqualsLiteral).getAnOperand() and
      firstOperand.(FieldAccess).getDeclaration() = v
    )
    or
    // ThisAccess
-    forex(Formula f | f = getOperand(_) | f.(EqualsLiteral).getAnOperand() instanceof ThisAccess) and
-    firstOperand = getOperand(0).(EqualsLiteral).getAnOperand().(ThisAccess)
+    forex(Formula f | f = this.getOperand(_) |
+      f.(EqualsLiteral).getAnOperand() instanceof ThisAccess
+    ) and
+    firstOperand = this.getOperand(0).(EqualsLiteral).getAnOperand().(ThisAccess)
    or
    // ResultAccess
-    forex(Formula f | f = getOperand(_) | f.(EqualsLiteral).getAnOperand() instanceof ResultAccess) and
-    firstOperand = getOperand(0).(EqualsLiteral).getAnOperand().(ResultAccess)
+    forex(Formula f | f = this.getOperand(_) |
+      f.(EqualsLiteral).getAnOperand() instanceof ResultAccess
+    ) and
+    firstOperand = this.getOperand(0).(EqualsLiteral).getAnOperand().(ResultAccess)
    // (in principle something like GlobalValueNumbering could be used to generalize this)
  }

@@ -100,8 +104,8 @@ class DisjunctionEqualsLiteral extends DisjunctionChain {
 */
 class CallLiteral extends Call {
  CallLiteral() {
-    getNumberOfArguments() = 1 and
-    getArgument(0) instanceof Literal
+    this.getNumberOfArguments() = 1 and
+    this.getArgument(0) instanceof Literal
  }
 }

@@ -118,7 +122,7 @@ class DisjunctionPredicateLiteral extends DisjunctionChain {
  DisjunctionPredicateLiteral() {
    // Call to the same target
    exists(PredicateOrBuiltin target |
-      forex(Formula f | f = getOperand(_) | f.(CallLiteral).getTarget() = target)
+      forex(Formula f | f = this.getOperand(_) | f.(CallLiteral).getTarget() = target)
    )
  }
 }
--- a/ql/ql/src/qlpack.yml
+++ b/ql/ql/src/qlpack.yml
@@ -8,3 +8,4 @@ extractor: ql
 dependencies:
  codeql/typos: ${workspace}
  codeql/util: ${workspace}
+warnOnImplicitThis: true
--- a/ql/ql/test/callgraph/Foo.qll
+++ b/ql/ql/test/callgraph/Foo.qll
@@ -7,7 +7,7 @@ query predicate test() { foo() }
 class Foo extends AstNode {
  predicate bar() { none() }

-  predicate baz() { bar() }
+  predicate baz() { this.bar() }
 }

 class Sub extends Foo {
--- a/ql/ql/test/callgraph/callgraph.expected
+++ b/ql/ql/test/callgraph/callgraph.expected
@@ -5,7 +5,7 @@ getTarget
 | Bar.qll:30:12:30:32 | MemberCall | Bar.qll:19:7:19:18 | ClassPredicate getParameter |
 | Baz.qll:8:18:8:44 | MemberCall | Baz.qll:4:10:4:24 | ClassPredicate getImportedPath |
 | Foo.qll:5:26:5:30 | PredicateCall | Foo.qll:3:11:3:13 | ClasslessPredicate foo |
-| Foo.qll:10:21:10:25 | PredicateCall | Foo.qll:8:13:8:15 | ClassPredicate bar |
+| Foo.qll:10:21:10:30 | MemberCall | Foo.qll:8:13:8:15 | ClassPredicate bar |
 | Foo.qll:14:34:14:44 | MemberCall | Foo.qll:10:13:10:15 | ClassPredicate baz |
 | Foo.qll:17:27:17:42 | MemberCall | Foo.qll:8:13:8:15 | ClassPredicate bar |
 | Foo.qll:29:5:29:16 | PredicateCall | Foo.qll:20:13:20:20 | ClasslessPredicate myThing2 |
--- a/ruby/extractor/src/autobuilder.rs
+++ b/ruby/extractor/src/autobuilder.rs
@@ -1,45 +1,22 @@
-use clap::Args;
 use std::env;
 use std::path::PathBuf;
-use std::process::Command;
+
+use clap::Args;
+
+use codeql_extractor::autobuilder;

 #[derive(Args)]
 // The autobuilder takes no command-line options, but this may change in the future.
 pub struct Options {}

 pub fn run(_: Options) -> std::io::Result<()> {
-    let dist = env::var("CODEQL_DIST").expect("CODEQL_DIST not set");
-    let db = env::var("CODEQL_EXTRACTOR_RUBY_WIP_DATABASE")
+    let database = env::var("CODEQL_EXTRACTOR_RUBY_WIP_DATABASE")
        .expect("CODEQL_EXTRACTOR_RUBY_WIP_DATABASE not set");
-    let codeql = if env::consts::OS == "windows" {
-        "codeql.exe"
-    } else {
-        "codeql"
-    };
-    let codeql: PathBuf = [&dist, codeql].iter().collect();
-    let mut cmd = Command::new(codeql);
-    cmd.arg("database")
-        .arg("index-files")
-        .arg("--include-extension=.rb")
-        .arg("--include-extension=.erb")
-        .arg("--include-extension=.gemspec")
-        .arg("--include=**/Gemfile")
-        .arg("--exclude=**/.git")
-        .arg("--size-limit=5m")
-        .arg("--language=ruby")
-        .arg("--working-dir=.")
-        .arg(db);

-    for line in env::var("LGTM_INDEX_FILTERS")
-        .unwrap_or_default()
-        .split('\n')
-    {
-        if let Some(stripped) = line.strip_prefix("include:") {
-            cmd.arg("--also-match=".to_owned() + stripped);
-        } else if let Some(stripped) = line.strip_prefix("exclude:") {
-            cmd.arg("--exclude=".to_owned() + stripped);
-        }
-    }
-    let exit = &cmd.spawn()?.wait()?;
-    std::process::exit(exit.code().unwrap_or(1))
+    autobuilder::Autobuilder::new("ruby", PathBuf::from(database))
+        .include_extensions(&[".rb", ".erb", ".gemspec"])
+        .include_globs(&["**/Gemfile"])
+        .exclude_globs(&["**/.git"])
+        .size_limit("5m")
+        .run()
 }
--- a/shared/tree-sitter-extractor/src/autobuilder.rs
+++ b/shared/tree-sitter-extractor/src/autobuilder.rs
@@ -0,0 +1,90 @@
+use std::env;
+use std::path::PathBuf;
+use std::process::Command;
+
+pub struct Autobuilder {
+    include_extensions: Vec<String>,
+    include_globs: Vec<String>,
+    exclude_globs: Vec<String>,
+    language: String,
+    database: PathBuf,
+    size_limit: Option<String>,
+}
+
+impl Autobuilder {
+    pub fn new(language: &str, database: PathBuf) -> Self {
+        Self {
+            language: language.to_string(),
+            database: database,
+            include_extensions: vec![],
+            include_globs: vec![],
+            exclude_globs: vec![],
+            size_limit: None,
+        }
+    }
+
+    pub fn include_extensions(&mut self, exts: &[&str]) -> &mut Self {
+        self.include_extensions = exts.into_iter().map(|s| String::from(*s)).collect();
+        self
+    }
+
+    pub fn include_globs(&mut self, globs: &[&str]) -> &mut Self {
+        self.include_globs = globs.into_iter().map(|s| String::from(*s)).collect();
+        self
+    }
+
+    pub fn exclude_globs(&mut self, globs: &[&str]) -> &mut Self {
+        self.exclude_globs = globs.into_iter().map(|s| String::from(*s)).collect();
+        self
+    }
+
+    pub fn size_limit(&mut self, limit: &str) -> &mut Self {
+        self.size_limit = Some(limit.to_string());
+        self
+    }
+
+    pub fn run(&self) -> std::io::Result<()> {
+        let dist = env::var("CODEQL_DIST").expect("CODEQL_DIST not set");
+        let codeql = if env::consts::OS == "windows" {
+            "codeql.exe"
+        } else {
+            "codeql"
+        };
+        let codeql: PathBuf = [&dist, codeql].iter().collect();
+        let mut cmd = Command::new(codeql);
+        cmd.arg("database").arg("index-files");
+
+        for ext in &self.include_extensions {
+            cmd.arg(format!("--include-extension={}", ext));
+        }
+
+        for glob in &self.include_globs {
+            cmd.arg(format!("--include={}", glob));
+        }
+
+        for glob in &self.exclude_globs {
+            cmd.arg(format!("--exclude={}", glob));
+        }
+
+        if let Some(limit) = &self.size_limit {
+            cmd.arg(format!("--size-limit={}", limit));
+        }
+
+        cmd.arg(format!("--language={}", &self.language));
+        cmd.arg("--working-dir=.");
+        cmd.arg(&self.database);
+
+        for line in env::var("LGTM_INDEX_FILTERS")
+            .unwrap_or_default()
+            .split('\n')
+        {
+            if let Some(stripped) = line.strip_prefix("include:") {
+                cmd.arg("--also-match=".to_owned() + stripped);
+            } else if let Some(stripped) = line.strip_prefix("exclude:") {
+                cmd.arg("--exclude=".to_owned() + stripped);
+            }
+        }
+        let exit = &cmd.spawn()?.wait()?;
+        std::process::exit(exit.code().unwrap_or(1))
+    }
+}
--- a/shared/tree-sitter-extractor/src/lib.rs
+++ b/shared/tree-sitter-extractor/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod autobuilder;
 pub mod diagnostics;
 pub mod extractor;
 pub mod file_paths;
--- a/swift/BUILD.bazel
+++ b/swift/BUILD.bazel
@@ -57,6 +57,12 @@ pkg_runfiles(
    prefix = "tools/" + codeql_platform,
 )

+pkg_runfiles(
+    name = "incompatible-os",
+    srcs = ["//swift/tools/autobuilder-diagnostics:incompatible-os"],
+    prefix = "tools/" + codeql_platform,
+)
+
 pkg_files(
    name = "swift-test-sdk-arch",
    srcs = ["//swift/third_party/swift-llvm-support:swift-test-sdk"],
@@ -70,7 +76,9 @@ pkg_filegroup(
        ":extractor",
        ":swift-test-sdk-arch",
    ] + select({
-        "@platforms//os:linux": [],
+        "@platforms//os:linux": [
+            ":incompatible-os",
+        ],
        "@platforms//os:macos": [
            ":xcode-autobuilder",
        ],
--- a/swift/integration-tests/diagnostics_test_utils.py
+++ b/swift/integration-tests/diagnostics_test_utils.py
@@ -40,6 +40,9 @@ def _load_concatenated_json(text):


 def _normalize_json(data):
+    # at the moment helpLinks are a set within the codeql cli
+    for e in data:
+        e.get("helpLinks", []).sort()
    entries = [json.dumps(e, sort_keys=True, indent=2) for e in data]
    entries.sort()
    entries.append("")
--- a/swift/integration-tests/linux-only/autobuilder/unsupported-os/diagnostics.expected
+++ b/swift/integration-tests/linux-only/autobuilder/unsupported-os/diagnostics.expected
@@ -0,0 +1,19 @@
+{
+  "helpLinks": [
+    "https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idruns-on",
+    "https://docs.github.com/en/enterprise-server/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/customizing-code-scanning",
+    "https://docs.github.com/en/enterprise-server/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-the-codeql-workflow-for-compiled-languages#adding-build-steps-for-a-compiled-language"
+  ],
+  "plaintextMessage": "CodeQL Swift analysis is currently only officially supported on macOS.\n\nChange the action runner to a macOS one. Analysis on Linux might work, but requires setting up a custom build command.",
+  "severity": "error",
+  "source": {
+    "extractorName": "swift",
+    "id": "swift/autobuilder/incompatible-os",
+    "name": "Incompatible operating system for autobuild (expected macOS)"
+  },
+  "visibility": {
+    "cliSummaryTable": true,
+    "statusPage": true,
+    "telemetry": true
+  }
+}
--- a/swift/integration-tests/linux-only/autobuilder/unsupported-os/test.py
+++ b/swift/integration-tests/linux-only/autobuilder/unsupported-os/test.py
@@ -0,0 +1,5 @@
+from create_database_utils import *
+from diagnostics_test_utils import *
+
+run_codeql_database_create([], lang='swift', keep_trap=True, db=None, runFunction=runUnsuccessfully)
+check_diagnostics()
--- a/swift/tools/autobuild.sh
+++ b/swift/tools/autobuild.sh
@@ -3,6 +3,5 @@
 if [[ "$OSTYPE" == "darwin"* ]]; then
  exec "${CODEQL_EXTRACTOR_SWIFT_ROOT}/tools/${CODEQL_PLATFORM}/xcode-autobuilder"
 else
-  echo "Not implemented yet"
-  exit 1
+  exec "${CODEQL_EXTRACTOR_SWIFT_ROOT}/tools/${CODEQL_PLATFORM}/incompatible-os"
 fi
--- a/swift/tools/autobuilder-diagnostics/BUILD.bazel
+++ b/swift/tools/autobuilder-diagnostics/BUILD.bazel
@@ -0,0 +1,17 @@
+load("//swift:rules.bzl", "swift_cc_binary")
+load("//misc/bazel/cmake:cmake.bzl", "generate_cmake")
+
+swift_cc_binary(
+    name = "incompatible-os",
+    srcs = ["IncompatibleOs.cpp"],
+    visibility = ["//swift:__subpackages__"],
+    deps = [
+        "//swift/logging",
+    ],
+)
+
+generate_cmake(
+    name = "cmake",
+    targets = [":incompatible-os"],
+    visibility = ["//visibility:public"],
+)
--- a/swift/tools/autobuilder-diagnostics/IncompatibleOs.cpp
+++ b/swift/tools/autobuilder-diagnostics/IncompatibleOs.cpp
@@ -0,0 +1,33 @@
+// Unconditionally emits a diagnostic about running the autobuilder on an incompatible, non-macOS OS
+// and exits with an error code.
+//
+// This is implemented as a C++ binary instead of a hardcoded JSON file so we can leverage existing
+// diagnostic machinery for emitting correct timestamps, generating correct file names, etc.
+
+#include "swift/logging/SwiftLogging.h"
+
+const std::string_view codeql::programName = "autobuilder";
+
+constexpr codeql::SwiftDiagnostic incompatibleOs{
+    "incompatible-os", "Incompatible operating system for autobuild (expected macOS)",
+    "Change the action runner to a macOS one. Analysis on Linux might work, but requires setting "
+    "up a custom build command",
+    "https://docs.github.com/en/actions/using-workflows/"
+    "workflow-syntax-for-github-actions#jobsjob_idruns-on "
+    "https://docs.github.com/en/enterprise-server/code-security/code-scanning/"
+    "automatically-scanning-your-code-for-vulnerabilities-and-errors/customizing-code-scanning "
+    "https://docs.github.com/en/enterprise-server/code-security/code-scanning/"
+    "automatically-scanning-your-code-for-vulnerabilities-and-errors/"
+    "configuring-the-codeql-workflow-for-compiled-languages#adding-build-steps-for-a-compiled-"
+    "language"};
+
+static codeql::Logger& logger() {
+  static codeql::Logger ret{"main"};
+  return ret;
+}
+
+int main() {
+  DIAGNOSE_ERROR(incompatibleOs,
+                 "CodeQL Swift analysis is currently only officially supported on macOS");
+  return 1;
+}