Merge branch 'main' into public-iterated-dominance-frontier

2025-12-24 04:36:35 +01:00 · 2022-03-24 12:50:29 +00:00
parent c76a323246 61c944201f
commit 80630972b1
54 changed files with 1068 additions and 293 deletions
--- a/config/identical-files.json
+++ b/config/identical-files.json
@@ -519,8 +519,14 @@
    "javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
    "ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
  ],
+  "Concepts Python/Ruby/JS": [
+    "python/ql/lib/semmle/python/internal/ConceptsShared.qll",
+    "ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll",
+    "javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll"
+  ],
  "Hostname Regexp queries": [
    "javascript/ql/src/Security/CWE-020/HostnameRegexpShared.qll",
+    "python/ql/src/Security/CWE-020/HostnameRegexpShared.qll",
    "ruby/ql/src/queries/security/cwe-020/HostnameRegexpShared.qll"
  ],
  "ApiGraphModels": [
--- a/cpp/ql/lib/semmle/code/cpp/Location.qll
+++ b/cpp/ql/lib/semmle/code/cpp/Location.qll
@@ -73,8 +73,24 @@ class Location extends @location {

  /** Holds if `this` comes on a line strictly before `l`. */
  pragma[inline]
-  predicate isBefore(Location l) {
-    this.getFile() = l.getFile() and this.getEndLine() < l.getStartLine()
+  predicate isBefore(Location l) { this.isBefore(l, false) }
+
+  /**
+   * Holds if `this` comes strictly before `l`. The boolean `sameLine` is
+   * true if `l` is on the same line as `this`, but starts at a later column.
+   * Otherwise, `sameLine` is false.
+   */
+  pragma[inline]
+  predicate isBefore(Location l, boolean sameLine) {
+    this.getFile() = l.getFile() and
+    (
+      sameLine = false and
+      this.getEndLine() < l.getStartLine()
+      or
+      sameLine = true and
+      this.getEndLine() = l.getStartLine() and
+      this.getEndColumn() < l.getStartColumn()
+    )
  }

  /** Holds if location `l` is completely contained within this one. */
--- a/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/IRConstruction.qll
@@ -349,7 +349,7 @@ Instruction getInstructionBackEdgeSuccessor(Instruction instruction, EdgeKind ki

 /** Holds if `goto` jumps strictly forward in the program text. */
 private predicate isStrictlyForwardGoto(GotoStmt goto) {
-  goto.getLocation().isBefore(goto.getTarget().getLocation())
+  goto.getLocation().isBefore(goto.getTarget().getLocation(), _)
 }

 Locatable getInstructionAst(TStageInstruction instr) {
--- a/cpp/ql/lib/semmle/code/cpp/models/interfaces/FormattingFunction.qll
+++ b/cpp/ql/lib/semmle/code/cpp/models/interfaces/FormattingFunction.qll
@@ -92,6 +92,7 @@ abstract class FormattingFunction extends ArrayFunction, TaintFunction {
   * snapshots there may be multiple results where we can't tell which is correct for a
   * particular function.
   */
+  pragma[nomagic]
  Type getWideCharType() {
    result = getFormatCharType() and
    result.getSize() > 1
--- a/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.ql
+++ b/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Extraction errors
+ * @description List all extraction errors for files in the source code directory.
+ * @kind diagnostic
+ * @id cpp/diagnostics/extraction-errors
+ */
+
+import cpp
+import ExtractionErrors
+
+// NOTE:
+// This file looks like the other `diagnostics/extraction-errors` queries in other CodeQL supported
+// languages. However, since this diagnostic query is located in the `Internal` subdirectory it will not
+// appear in the Code Scanning suite. The related query `cpp/diagnostics/extraction-warnings` is,
+// however, included as a public diagnostics query.
+from ExtractionError error
+where
+  error instanceof ExtractionUnknownError or
+  exists(error.getFile().getRelativePath())
+select error, "Extraction failed in " + error.getFile() + " with error " + error.getErrorMessage(),
+  error.getSeverity()
--- a/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.qll
+++ b/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.qll
@@ -0,0 +1,137 @@
+/**
+ * Provides a common hierarchy of all types of errors that can occur during extraction.
+ */
+
+import cpp
+
+/*
+ * A note about how the C/C++ extractor emits diagnostics:
+ * When the extractor frontend encounters an error, it emits a diagnostic message,
+ * that includes a message, location and severity.
+ * However, that process is best-effort and may fail (e.g. due to lack of memory).
+ * Thus, if the extractor emitted at least one diagnostic of severity discretionary
+ * error (or higher), it *also* emits a simple "There was an error during this compilation"
+ * error diagnostic, without location information.
+ * In the common case, this means that a compilation during which one or more errors happened also gets
+ * the catch-all diagnostic.
+ * This diagnostic has the empty string as file path.
+ * We filter out these useless diagnostics if there is at least one error-level diagnostic
+ * for the affected compilation in the database.
+ * Otherwise, we show it to indicate that something went wrong and that we
+ * don't know what exactly happened.
+ */
+
+/**
+ * An error that, if present, leads to a file being marked as non-successfully extracted.
+ */
+class ReportableError extends Diagnostic {
+  ReportableError() {
+    (
+      this instanceof CompilerDiscretionaryError or
+      this instanceof CompilerError or
+      this instanceof CompilerCatastrophe
+    ) and
+    // Filter for the catch-all diagnostic, see note above.
+    not this.getFile().getAbsolutePath() = ""
+  }
+}
+
+private newtype TExtractionError =
+  TReportableError(ReportableError err) or
+  TCompilationFailed(Compilation c, File f) {
+    f = c.getAFileCompiled() and not c.normalTermination()
+  } or
+  // Show the catch-all diagnostic (see note above) only if we haven't seen any other error-level diagnostic
+  // for that compilation
+  TUnknownError(CompilerError err) {
+    not exists(ReportableError e | e.getCompilation() = err.getCompilation())
+  }
+
+/**
+ * Superclass for the extraction error hierarchy.
+ */
+class ExtractionError extends TExtractionError {
+  /** Gets the string representation of the error. */
+  string toString() { none() }
+
+  /** Gets the error message for this error. */
+  string getErrorMessage() { none() }
+
+  /** Gets the file this error occured in. */
+  File getFile() { none() }
+
+  /** Gets the location this error occured in. */
+  Location getLocation() { none() }
+
+  /** Gets the SARIF severity of this error. */
+  int getSeverity() {
+    // Unfortunately, we can't distinguish between errors and fatal errors in SARIF,
+    // so all errors have severity 2.
+    result = 2
+  }
+}
+
+/**
+ * An unrecoverable extraction error, where extraction was unable to finish.
+ * This can be caused by a multitude of reasons, for example:
+ *  - hitting a frontend assertion
+ *  - crashing due to dereferencing an invalid pointer
+ *  - stack overflow
+ *  - out of memory
+ */
+class ExtractionUnrecoverableError extends ExtractionError, TCompilationFailed {
+  Compilation c;
+  File f;
+
+  ExtractionUnrecoverableError() { this = TCompilationFailed(c, f) }
+
+  override string toString() {
+    result = "Unrecoverable extraction error while compiling " + f.toString()
+  }
+
+  override string getErrorMessage() { result = "unrecoverable compilation failure." }
+
+  override File getFile() { result = f }
+
+  override Location getLocation() { result = f.getLocation() }
+}
+
+/**
+ * A recoverable extraction error.
+ * These are compiler errors from the frontend.
+ * Upon encountering one of these, we still continue extraction, but the
+ * database will be incomplete for that file.
+ */
+class ExtractionRecoverableError extends ExtractionError, TReportableError {
+  ReportableError err;
+
+  ExtractionRecoverableError() { this = TReportableError(err) }
+
+  override string toString() { result = "Recoverable extraction error: " + err }
+
+  override string getErrorMessage() { result = err.getFullMessage() }
+
+  override File getFile() { result = err.getFile() }
+
+  override Location getLocation() { result = err.getLocation() }
+}
+
+/**
+ * An unknown error happened during extraction.
+ * These are only displayed if we know that we encountered an error during extraction,
+ * but, for some reason, failed to emit a proper diagnostic with location information
+ * and error message.
+ */
+class ExtractionUnknownError extends ExtractionError, TUnknownError {
+  CompilerError err;
+
+  ExtractionUnknownError() { this = TUnknownError(err) }
+
+  override string toString() { result = "Unknown extraction error: " + err }
+
+  override string getErrorMessage() { result = err.getFullMessage() }
+
+  override File getFile() { result = err.getFile() }
+
+  override Location getLocation() { result = err.getLocation() }
+}
--- a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
+++ b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
@@ -19,9 +19,9 @@ import semmle.code.cpp.security.Security
 import semmle.code.cpp.valuenumbering.GlobalValueNumbering
 import semmle.code.cpp.ir.IR
 import semmle.code.cpp.ir.dataflow.TaintTracking
-import semmle.code.cpp.ir.dataflow.TaintTracking2
 import semmle.code.cpp.security.FlowSources
 import semmle.code.cpp.models.implementations.Strcat
+import DataFlow::PathGraph

 Expr sinkAsArgumentIndirection(DataFlow::Node sink) {
  result =
@@ -66,154 +66,70 @@ predicate interestingConcatenation(DataFlow::Node fst, DataFlow::Node snd) {
  )
 }

-class TaintToConcatenationConfiguration extends TaintTracking::Configuration {
-  TaintToConcatenationConfiguration() { this = "TaintToConcatenationConfiguration" }
+class ConcatState extends DataFlow::FlowState {
+  ConcatState() { this = "ConcatState" }
+}

-  override predicate isSource(DataFlow::Node source) { source instanceof FlowSource }
+class ExecState extends DataFlow::FlowState {
+  DataFlow::Node fst;
+  DataFlow::Node snd;

-  override predicate isSink(DataFlow::Node sink) { interestingConcatenation(sink, _) }
+  ExecState() {
+    this =
+      "ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and
+    interestingConcatenation(fst, snd)
+  }

-  override predicate isSanitizer(DataFlow::Node node) {
+  DataFlow::Node getFstNode() { result = fst }
+
+  DataFlow::Node getSndNode() { result = snd }
+}
+
+class ExecTaintConfiguration extends TaintTracking::Configuration {
+  ExecTaintConfiguration() { this = "ExecTaintConfiguration" }
+
+  override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
+    source instanceof FlowSource and
+    state instanceof ConcatState
+  }
+
+  override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
+    shellCommand(sinkAsArgumentIndirection(sink), _) and
+    state instanceof ExecState
+  }
+
+  override predicate isAdditionalTaintStep(
+    DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
+    DataFlow::FlowState state2
+  ) {
+    state1 instanceof ConcatState and
+    state2.(ExecState).getFstNode() = node1 and
+    state2.(ExecState).getSndNode() = node2
+  }
+
+  override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
+    (
      node.asInstruction().getResultType() instanceof IntegralType
      or
      node.asInstruction().getResultType() instanceof FloatingPointType
-  }
+    ) and
+    state instanceof ConcatState
  }

-class ExecTaintConfiguration extends TaintTracking2::Configuration {
-  ExecTaintConfiguration() { this = "ExecTaintConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) {
-    exists(DataFlow::Node prevSink, TaintToConcatenationConfiguration conf |
-      conf.hasFlow(_, prevSink) and
-      interestingConcatenation(prevSink, source)
-    )
-  }
-
-  override predicate isSink(DataFlow::Node sink) {
-    shellCommand(sinkAsArgumentIndirection(sink), _)
-  }
-
-  override predicate isSanitizerOut(DataFlow::Node node) {
-    isSink(node) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
+  override predicate isSanitizerOut(DataFlow::Node node, DataFlow::FlowState state) {
+    isSink(node, state) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
  }
 }

-module StitchedPathGraph {
-  // There's a different PathNode class for each DataFlowImplN.qll, so we can't simply combine the
-  // PathGraph predicates directly. Instead, we use a newtype so there's a single type that
-  // contains both sets of PathNodes.
-  newtype TMergedPathNode =
-    TPathNode1(DataFlow::PathNode node) or
-    TPathNode2(DataFlow2::PathNode node)
-
-  // this wraps the toString and location predicates so we can use the merged node type in a
-  // selection
-  class MergedPathNode extends TMergedPathNode {
-    string toString() {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        result = n.toString()
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        result = n.toString()
-      )
-    }
-
-    DataFlow::Node getNode() {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        result = n.getNode()
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        result = n.getNode()
-      )
-    }
-
-    DataFlow::PathNode getPathNode1() { this = TPathNode1(result) }
-
-    DataFlow2::PathNode getPathNode2() { this = TPathNode2(result) }
-
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-      )
-    }
-  }
-
-  query predicate edges(MergedPathNode a, MergedPathNode b) {
-    exists(DataFlow::PathNode an, DataFlow::PathNode bn |
-      a = TPathNode1(an) and
-      b = TPathNode1(bn) and
-      DataFlow::PathGraph::edges(an, bn)
-    )
-    or
-    exists(DataFlow2::PathNode an, DataFlow2::PathNode bn |
-      a = TPathNode2(an) and
-      b = TPathNode2(bn) and
-      DataFlow2::PathGraph::edges(an, bn)
-    )
-    or
-    // This is where paths from the two configurations are connected. `interestingConcatenation`
-    // is the only thing in this module that's actually specific to the query - everything else is
-    // just using types and predicates from the DataFlow library.
-    interestingConcatenation(a.getNode(), b.getNode()) and
-    a instanceof TPathNode1 and
-    b instanceof TPathNode2
-  }
-
-  query predicate nodes(MergedPathNode mpn, string key, string val) {
-    // here we just need the union of the underlying `nodes` predicates
-    exists(DataFlow::PathNode n |
-      mpn = TPathNode1(n) and
-      DataFlow::PathGraph::nodes(n, key, val)
-    )
-    or
-    exists(DataFlow2::PathNode n |
-      mpn = TPathNode2(n) and
-      DataFlow2::PathGraph::nodes(n, key, val)
-    )
-  }
-
-  query predicate subpaths(
-    MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
-  ) {
-    // just forward subpaths from the underlying libraries. This might be slightly awkward when
-    // the concatenation is deep in a call chain.
-    DataFlow::PathGraph::subpaths(arg.getPathNode1(), par.getPathNode1(), ret.getPathNode1(),
-      out.getPathNode1())
-    or
-    DataFlow2::PathGraph::subpaths(arg.getPathNode2(), par.getPathNode2(), ret.getPathNode2(),
-      out.getPathNode2())
-  }
-}
-
-import StitchedPathGraph
-
 from
-  DataFlow::PathNode sourceNode, DataFlow::PathNode concatSink, DataFlow2::PathNode concatSource,
-  DataFlow2::PathNode sinkNode, string taintCause, string callChain,
-  TaintToConcatenationConfiguration conf1, ExecTaintConfiguration conf2
+  ExecTaintConfiguration conf, DataFlow::PathNode sourceNode, DataFlow::PathNode sinkNode,
+  string taintCause, string callChain, DataFlow::Node concatResult
 where
+  conf.hasFlowPath(sourceNode, sinkNode) and
  taintCause = sourceNode.getNode().(FlowSource).getSourceType() and
-  conf1.hasFlowPath(sourceNode, concatSink) and
-  interestingConcatenation(concatSink.getNode(), concatSource.getNode()) and // this loses call context
-  conf2.hasFlowPath(concatSource, sinkNode) and
-  shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain)
-select sinkAsArgumentIndirection(sinkNode.getNode()), TPathNode1(sourceNode).(MergedPathNode),
-  TPathNode2(sinkNode).(MergedPathNode),
+  shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain) and
+  concatResult = sinkNode.getState().(ExecState).getSndNode()
+select sinkAsArgumentIndirection(sinkNode.getNode()), sourceNode, sinkNode,
  "This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to "
-    + callChain, sourceNode, "user input (" + taintCause + ")", concatSource,
-  concatSource.toString()
+    + callChain, sourceNode, "user input (" + taintCause + ")", concatResult,
+  concatResult.toString()
--- a/cpp/ql/src/change-notes/2022-03-21-command-line-injection-with-flow-states.md
+++ b/cpp/ql/src/change-notes/2022-03-21-command-line-injection-with-flow-states.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* The `cpp/command-line-injection` query now takes into account calling contexts across string concatenations. This removes false positives due to mismatched calling contexts before and after string concatenations.
--- a/cpp/ql/src/experimental/Security/CWE/CWE-362/double-fetch.ql
+++ b/cpp/ql/src/experimental/Security/CWE/CWE-362/double-fetch.ql
@@ -0,0 +1,46 @@
+/**
+ * @name Linux kernel double-fetch vulnerability detection
+ * @description Double-fetch is a very common vulnerability pattern
+ *              in linux kernel, attacker can exploit double-fetch
+ *              issues to obatain root privilege.
+ *              Double-fetch is caused by fetching data from user
+ *              mode by calling copy_from_user twice, CVE-2016-6480
+ *              is quite a good example for your information.
+ * @kind problem
+ * @id cpp/linux-kernel-double-fetch-vulnerability
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags security
+ *       external/cwe/cwe-362
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+class CopyFromUserFunctionCall extends FunctionCall {
+  CopyFromUserFunctionCall() {
+    this.getTarget().getName() = "copy_from_user" and
+    not this.getArgument(1) instanceof AddressOfExpr
+  }
+
+  //root cause of double-fetech issue is read from
+  //the same user mode memory twice, so it makes
+  //sense that only check user mode pointer
+  predicate readFromSameUserModePointer(CopyFromUserFunctionCall another) {
+    globalValueNumber(this.getArgument(1)) = globalValueNumber(another.getArgument(1))
+  }
+}
+
+from CopyFromUserFunctionCall p1, CopyFromUserFunctionCall p2
+where
+  not p1 = p2 and
+  p1.readFromSameUserModePointer(p2) and
+  exists(IfStmt ifStmt |
+    p1.getBasicBlock().getAFalseSuccessor*() = ifStmt.getBasicBlock() and
+    ifStmt.getBasicBlock().getAFalseSuccessor*() = p2.getBasicBlock()
+  ) and
+  not exists(AssignPointerAddExpr assignPtrAdd |
+    globalValueNumber(p1.getArgument(1)) = globalValueNumber(assignPtrAdd.getLValue()) and
+    p1.getBasicBlock().getAFalseSuccessor*() = assignPtrAdd.getBasicBlock()
+  )
+select p2, "Double fetch vulnerability. First fetch was $@.", p1, p1.toString()
--- a/cpp/ql/test/library-tests/ir/ir/PrintAST.expected
+++ b/cpp/ql/test/library-tests/ir/ir/PrintAST.expected
@@ -13035,6 +13035,23 @@ ir.cpp:
 # 1689|   getEntryPoint(): [BlockStmt] { ... }
 # 1689|     getStmt(0): [EmptyStmt] ;
 # 1689|     getStmt(1): [ReturnStmt] return ...
+# 1693| [TopLevelFunction] int goto_on_same_line()
+# 1693|   <params>: 
+# 1693|   getEntryPoint(): [BlockStmt] { ... }
+# 1694|     getStmt(0): [DeclStmt] declaration
+# 1694|       getDeclarationEntry(0): [VariableDeclarationEntry] definition of x
+# 1694|           Type = [IntType] int
+# 1694|         getVariable().getInitializer(): [Initializer] initializer for x
+# 1694|           getExpr(): [Literal] 42
+# 1694|               Type = [IntType] int
+# 1694|               Value = [Literal] 42
+# 1694|               ValueCategory = prvalue
+# 1695|     getStmt(1): [GotoStmt] goto ...
+# 1695|     getStmt(2): [LabelStmt] label ...:
+# 1696|     getStmt(3): [ReturnStmt] return ...
+# 1696|       getExpr(): [VariableAccess] x
+# 1696|           Type = [IntType] int
+# 1696|           ValueCategory = prvalue(load)
 perf-regression.cpp:
 #    4| [CopyAssignmentOperator] Big& Big::operator=(Big const&)
 #    4|   <params>: 
--- a/cpp/ql/test/library-tests/ir/ir/ir.cpp
+++ b/cpp/ql/test/library-tests/ir/ir/ir.cpp
@@ -1690,4 +1690,10 @@ void captured_lambda(int x, int &y, int &&z)
    };
 }

+int goto_on_same_line() {
+  int x = 42;
+  goto next; next:
+  return x;
+}
+
 // semmle-extractor-options: -std=c++17 --clang
--- a/cpp/ql/test/library-tests/ir/ir/operand_locations.expected
+++ b/cpp/ql/test/library-tests/ir/ir/operand_locations.expected
@@ -7527,6 +7527,17 @@
 | ir.cpp:1689:50:1689:50 | Load | m1689_6 |
 | ir.cpp:1689:50:1689:50 | SideEffect | m1689_3 |
 | ir.cpp:1689:50:1689:50 | SideEffect | m1689_8 |
+| ir.cpp:1693:5:1693:21 | Address | &:r1693_5 |
+| ir.cpp:1693:5:1693:21 | ChiPartial | partial:m1693_3 |
+| ir.cpp:1693:5:1693:21 | ChiTotal | total:m1693_2 |
+| ir.cpp:1693:5:1693:21 | Load | m1696_4 |
+| ir.cpp:1693:5:1693:21 | SideEffect | m1693_3 |
+| ir.cpp:1694:7:1694:7 | Address | &:r1694_1 |
+| ir.cpp:1694:10:1694:12 | StoreValue | r1694_2 |
+| ir.cpp:1696:3:1696:11 | Address | &:r1696_1 |
+| ir.cpp:1696:10:1696:10 | Address | &:r1696_2 |
+| ir.cpp:1696:10:1696:10 | Load | m1694_3 |
+| ir.cpp:1696:10:1696:10 | StoreValue | r1696_3 |
 | perf-regression.cpp:6:3:6:5 | Address | &:r6_5 |
 | perf-regression.cpp:6:3:6:5 | Address | &:r6_5 |
 | perf-regression.cpp:6:3:6:5 | Address | &:r6_7 |
--- a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected
+++ b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected
@@ -8842,6 +8842,25 @@ ir.cpp:
 # 1689|     v1689_12(void)                             = AliasedUse                   : ~m?
 # 1689|     v1689_13(void)                             = ExitFunction                 : 

+# 1693| int goto_on_same_line()
+# 1693|   Block 0
+# 1693|     v1693_1(void)       = EnterFunction            : 
+# 1693|     mu1693_2(unknown)   = AliasedDefinition        : 
+# 1693|     mu1693_3(unknown)   = InitializeNonLocal       : 
+# 1694|     r1694_1(glval<int>) = VariableAddress[x]       : 
+# 1694|     r1694_2(int)        = Constant[42]             : 
+# 1694|     mu1694_3(int)       = Store[x]                 : &:r1694_1, r1694_2
+# 1695|     v1695_1(void)       = NoOp                     : 
+# 1695|     v1695_2(void)       = NoOp                     : 
+# 1696|     r1696_1(glval<int>) = VariableAddress[#return] : 
+# 1696|     r1696_2(glval<int>) = VariableAddress[x]       : 
+# 1696|     r1696_3(int)        = Load[x]                  : &:r1696_2, ~m?
+# 1696|     mu1696_4(int)       = Store[#return]           : &:r1696_1, r1696_3
+# 1693|     r1693_4(glval<int>) = VariableAddress[#return] : 
+# 1693|     v1693_5(void)       = ReturnValue              : &:r1693_4, ~m?
+# 1693|     v1693_6(void)       = AliasedUse               : ~m?
+# 1693|     v1693_7(void)       = ExitFunction             : 
+
 perf-regression.cpp:
 #    6| void Big::Big()
 #    6|   Block 0
--- a/cpp/ql/test/query-tests/Security/CWE/CWE-078/SAMATE/ExecTainted/ExecTainted.expected
+++ b/cpp/ql/test/query-tests/Security/CWE/CWE-078/SAMATE/ExecTainted/ExecTainted.expected
@@ -3,7 +3,6 @@ edges
 | tests.cpp:33:34:33:39 | call to getenv | tests.cpp:38:39:38:49 | environment indirection |
 | tests.cpp:38:25:38:36 | strncat output argument | tests.cpp:26:15:26:23 | ReturnValue |
 | tests.cpp:38:39:38:49 | environment indirection | tests.cpp:38:25:38:36 | strncat output argument |
-| tests.cpp:38:39:38:49 | environment indirection | tests.cpp:38:25:38:36 | strncat output argument |
 | tests.cpp:51:12:51:20 | call to badSource | tests.cpp:53:16:53:19 | data indirection |
 nodes
 | tests.cpp:26:15:26:23 | ReturnValue | semmle.label | ReturnValue |
--- a/cpp/ql/test/query-tests/Security/CWE/CWE-078/semmle/ExecTainted/ExecTainted.expected
+++ b/cpp/ql/test/query-tests/Security/CWE/CWE-078/semmle/ExecTainted/ExecTainted.expected
@@ -2,64 +2,55 @@ edges
 | test.cpp:16:20:16:23 | argv | test.cpp:22:45:22:52 | userName indirection |
 | test.cpp:22:13:22:20 | sprintf output argument | test.cpp:23:12:23:19 | command1 indirection |
 | test.cpp:22:45:22:52 | userName indirection | test.cpp:22:13:22:20 | sprintf output argument |
-| test.cpp:22:45:22:52 | userName indirection | test.cpp:22:13:22:20 | sprintf output argument |
 | test.cpp:47:21:47:26 | call to getenv | test.cpp:50:35:50:43 | envCflags indirection |
 | test.cpp:50:11:50:17 | sprintf output argument | test.cpp:51:10:51:16 | command indirection |
 | test.cpp:50:35:50:43 | envCflags indirection | test.cpp:50:11:50:17 | sprintf output argument |
-| test.cpp:50:35:50:43 | envCflags indirection | test.cpp:50:11:50:17 | sprintf output argument |
 | test.cpp:62:9:62:16 | fread output argument | test.cpp:64:20:64:27 | filename indirection |
 | test.cpp:64:11:64:17 | strncat output argument | test.cpp:65:10:65:16 | command indirection |
 | test.cpp:64:20:64:27 | filename indirection | test.cpp:64:11:64:17 | strncat output argument |
-| test.cpp:64:20:64:27 | filename indirection | test.cpp:64:11:64:17 | strncat output argument |
 | test.cpp:82:9:82:16 | fread output argument | test.cpp:84:20:84:27 | filename indirection |
 | test.cpp:84:11:84:17 | strncat output argument | test.cpp:85:32:85:38 | command indirection |
 | test.cpp:84:20:84:27 | filename indirection | test.cpp:84:11:84:17 | strncat output argument |
-| test.cpp:84:20:84:27 | filename indirection | test.cpp:84:11:84:17 | strncat output argument |
 | test.cpp:91:9:91:16 | fread output argument | test.cpp:93:17:93:24 | filename indirection |
 | test.cpp:93:11:93:14 | strncat output argument | test.cpp:94:45:94:48 | path indirection |
 | test.cpp:93:17:93:24 | filename indirection | test.cpp:93:11:93:14 | strncat output argument |
-| test.cpp:93:17:93:24 | filename indirection | test.cpp:93:11:93:14 | strncat output argument |
 | test.cpp:106:20:106:25 | call to getenv | test.cpp:107:33:107:36 | path indirection |
 | test.cpp:107:31:107:31 | call to operator+ | test.cpp:108:18:108:22 | call to c_str indirection |
 | test.cpp:107:33:107:36 | path indirection | test.cpp:107:31:107:31 | call to operator+ |
-| test.cpp:107:33:107:36 | path indirection | test.cpp:107:31:107:31 | call to operator+ |
 | test.cpp:113:20:113:25 | call to getenv | test.cpp:114:19:114:22 | path indirection |
 | test.cpp:114:17:114:17 | Call | test.cpp:114:25:114:29 | call to c_str indirection |
 | test.cpp:114:19:114:22 | path indirection | test.cpp:114:17:114:17 | Call |
-| test.cpp:114:19:114:22 | path indirection | test.cpp:114:17:114:17 | Call |
 | test.cpp:119:20:119:25 | call to getenv | test.cpp:120:19:120:22 | path indirection |
 | test.cpp:120:17:120:17 | Call | test.cpp:120:10:120:30 | call to data indirection |
 | test.cpp:120:19:120:22 | path indirection | test.cpp:120:17:120:17 | Call |
-| test.cpp:120:19:120:22 | path indirection | test.cpp:120:17:120:17 | Call |
 | test.cpp:140:9:140:11 | fread output argument | test.cpp:142:31:142:33 | str indirection |
 | test.cpp:142:11:142:17 | sprintf output argument | test.cpp:143:10:143:16 | command indirection |
 | test.cpp:142:31:142:33 | str indirection | test.cpp:142:11:142:17 | sprintf output argument |
-| test.cpp:142:31:142:33 | str indirection | test.cpp:142:11:142:17 | sprintf output argument |
 | test.cpp:174:9:174:16 | fread output argument | test.cpp:177:20:177:27 | filename indirection |
 | test.cpp:174:9:174:16 | fread output argument | test.cpp:178:22:178:26 | flags indirection |
 | test.cpp:174:9:174:16 | fread output argument | test.cpp:180:22:180:29 | filename indirection |
 | test.cpp:177:13:177:17 | strncat output argument | test.cpp:183:32:183:38 | command indirection |
 | test.cpp:177:20:177:27 | filename indirection | test.cpp:177:13:177:17 | strncat output argument |
-| test.cpp:177:20:177:27 | filename indirection | test.cpp:177:13:177:17 | strncat output argument |
 | test.cpp:178:13:178:19 | strncat output argument | test.cpp:183:32:183:38 | command indirection |
 | test.cpp:178:22:178:26 | flags indirection | test.cpp:178:13:178:19 | strncat output argument |
-| test.cpp:178:22:178:26 | flags indirection | test.cpp:178:13:178:19 | strncat output argument |
 | test.cpp:180:13:180:19 | strncat output argument | test.cpp:183:32:183:38 | command indirection |
 | test.cpp:180:22:180:29 | filename indirection | test.cpp:180:13:180:19 | strncat output argument |
-| test.cpp:180:22:180:29 | filename indirection | test.cpp:180:13:180:19 | strncat output argument |
 | test.cpp:186:47:186:54 | *filename | test.cpp:187:18:187:25 | filename indirection |
 | test.cpp:186:47:186:54 | *filename | test.cpp:188:20:188:24 | flags indirection |
 | test.cpp:186:47:186:54 | filename | test.cpp:187:18:187:25 | filename indirection |
 | test.cpp:186:47:186:54 | filename | test.cpp:188:20:188:24 | flags indirection |
 | test.cpp:187:11:187:15 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
 | test.cpp:187:11:187:15 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:187:11:187:15 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:187:11:187:15 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
 | test.cpp:187:18:187:25 | filename indirection | test.cpp:187:11:187:15 | strncat output argument |
 | test.cpp:187:18:187:25 | filename indirection | test.cpp:187:11:187:15 | strncat output argument |
 | test.cpp:188:11:188:17 | command [post update] | test.cpp:188:11:188:17 | command [post update] |
-| test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
-| test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
-| test.cpp:188:11:188:17 | command [post update] | test.cpp:205:10:205:16 | command [post update] |
-| test.cpp:188:11:188:17 | command [post update] | test.cpp:205:10:205:16 | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
+| test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
 | test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
 | test.cpp:188:11:188:17 | strncat output argument | test.cpp:188:11:188:17 | command [post update] |
 | test.cpp:188:20:188:24 | flags indirection | test.cpp:188:11:188:17 | strncat output argument |
@@ -67,9 +58,21 @@ edges
 | test.cpp:194:9:194:16 | fread output argument | test.cpp:196:26:196:33 | filename |
 | test.cpp:194:9:194:16 | fread output argument | test.cpp:196:26:196:33 | filename indirection |
 | test.cpp:196:10:196:16 | command [post update] | test.cpp:198:32:198:38 | command indirection |
+| test.cpp:196:10:196:16 | command [post update] | test.cpp:198:32:198:38 | command indirection |
 | test.cpp:196:26:196:33 | filename | test.cpp:186:47:186:54 | filename |
+| test.cpp:196:26:196:33 | filename | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename | test.cpp:196:10:196:16 | command [post update] |
 | test.cpp:196:26:196:33 | filename indirection | test.cpp:186:47:186:54 | *filename |
-| test.cpp:205:10:205:16 | command [post update] | test.cpp:207:32:207:38 | command indirection |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:218:9:218:16 | fread output argument | test.cpp:220:19:220:26 | filename indirection |
+| test.cpp:218:9:218:16 | fread output argument | test.cpp:220:19:220:26 | filename indirection |
+| test.cpp:220:10:220:16 | strncat output argument | test.cpp:222:32:222:38 | command indirection |
+| test.cpp:220:10:220:16 | strncat output argument | test.cpp:222:32:222:38 | command indirection |
+| test.cpp:220:19:220:26 | filename indirection | test.cpp:220:10:220:16 | strncat output argument |
+| test.cpp:220:19:220:26 | filename indirection | test.cpp:220:10:220:16 | strncat output argument |
+| test.cpp:220:19:220:26 | filename indirection | test.cpp:220:10:220:16 | strncat output argument |
+| test.cpp:220:19:220:26 | filename indirection | test.cpp:220:10:220:16 | strncat output argument |
 nodes
 | test.cpp:16:20:16:23 | argv | semmle.label | argv |
 | test.cpp:22:13:22:20 | sprintf output argument | semmle.label | sprintf output argument |
@@ -115,22 +118,48 @@ nodes
 | test.cpp:180:13:180:19 | strncat output argument | semmle.label | strncat output argument |
 | test.cpp:180:22:180:29 | filename indirection | semmle.label | filename indirection |
 | test.cpp:183:32:183:38 | command indirection | semmle.label | command indirection |
+| test.cpp:183:32:183:38 | command indirection | semmle.label | command indirection |
+| test.cpp:183:32:183:38 | command indirection | semmle.label | command indirection |
 | test.cpp:186:47:186:54 | *filename | semmle.label | *filename |
 | test.cpp:186:47:186:54 | filename | semmle.label | filename |
 | test.cpp:187:11:187:15 | strncat output argument | semmle.label | strncat output argument |
+| test.cpp:187:11:187:15 | strncat output argument | semmle.label | strncat output argument |
+| test.cpp:187:18:187:25 | filename indirection | semmle.label | filename indirection |
 | test.cpp:187:18:187:25 | filename indirection | semmle.label | filename indirection |
 | test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
 | test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | command [post update] | semmle.label | command [post update] |
+| test.cpp:188:11:188:17 | strncat output argument | semmle.label | strncat output argument |
 | test.cpp:188:11:188:17 | strncat output argument | semmle.label | strncat output argument |
 | test.cpp:188:20:188:24 | flags indirection | semmle.label | flags indirection |
+| test.cpp:188:20:188:24 | flags indirection | semmle.label | flags indirection |
 | test.cpp:194:9:194:16 | fread output argument | semmle.label | fread output argument |
 | test.cpp:196:10:196:16 | command [post update] | semmle.label | command [post update] |
+| test.cpp:196:10:196:16 | command [post update] | semmle.label | command [post update] |
 | test.cpp:196:26:196:33 | filename | semmle.label | filename |
 | test.cpp:196:26:196:33 | filename indirection | semmle.label | filename indirection |
 | test.cpp:198:32:198:38 | command indirection | semmle.label | command indirection |
-| test.cpp:205:10:205:16 | command [post update] | semmle.label | command [post update] |
-| test.cpp:207:32:207:38 | command indirection | semmle.label | command indirection |
+| test.cpp:198:32:198:38 | command indirection | semmle.label | command indirection |
+| test.cpp:218:9:218:16 | fread output argument | semmle.label | fread output argument |
+| test.cpp:220:10:220:16 | strncat output argument | semmle.label | strncat output argument |
+| test.cpp:220:10:220:16 | strncat output argument | semmle.label | strncat output argument |
+| test.cpp:220:19:220:26 | filename indirection | semmle.label | filename indirection |
+| test.cpp:220:19:220:26 | filename indirection | semmle.label | filename indirection |
+| test.cpp:222:32:222:38 | command indirection | semmle.label | command indirection |
 subpaths
+| test.cpp:196:26:196:33 | filename | test.cpp:186:47:186:54 | filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename | test.cpp:186:47:186:54 | filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename | test.cpp:186:47:186:54 | filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename | test.cpp:186:47:186:54 | filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
+| test.cpp:196:26:196:33 | filename indirection | test.cpp:186:47:186:54 | *filename | test.cpp:188:11:188:17 | command [post update] | test.cpp:196:10:196:16 | command [post update] |
 #select
 | test.cpp:23:12:23:19 | command1 | test.cpp:16:20:16:23 | argv | test.cpp:23:12:23:19 | command1 indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:16:20:16:23 | argv | user input (a command-line argument) | test.cpp:22:13:22:20 | sprintf output argument | sprintf output argument |
 | test.cpp:51:10:51:16 | command | test.cpp:47:21:47:26 | call to getenv | test.cpp:51:10:51:16 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:47:21:47:26 | call to getenv | user input (an environment variable) | test.cpp:50:11:50:17 | sprintf output argument | sprintf output argument |
@@ -146,5 +175,5 @@ subpaths
 | test.cpp:183:32:183:38 | command | test.cpp:174:9:174:16 | fread output argument | test.cpp:183:32:183:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:174:9:174:16 | fread output argument | user input (String read by fread) | test.cpp:180:13:180:19 | strncat output argument | strncat output argument |
 | test.cpp:198:32:198:38 | command | test.cpp:194:9:194:16 | fread output argument | test.cpp:198:32:198:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:194:9:194:16 | fread output argument | user input (String read by fread) | test.cpp:187:11:187:15 | strncat output argument | strncat output argument |
 | test.cpp:198:32:198:38 | command | test.cpp:194:9:194:16 | fread output argument | test.cpp:198:32:198:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:194:9:194:16 | fread output argument | user input (String read by fread) | test.cpp:188:11:188:17 | strncat output argument | strncat output argument |
-| test.cpp:207:32:207:38 | command | test.cpp:194:9:194:16 | fread output argument | test.cpp:207:32:207:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:194:9:194:16 | fread output argument | user input (String read by fread) | test.cpp:187:11:187:15 | strncat output argument | strncat output argument |
-| test.cpp:207:32:207:38 | command | test.cpp:194:9:194:16 | fread output argument | test.cpp:207:32:207:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:194:9:194:16 | fread output argument | user input (String read by fread) | test.cpp:188:11:188:17 | strncat output argument | strncat output argument |
+| test.cpp:222:32:222:38 | command | test.cpp:218:9:218:16 | fread output argument | test.cpp:222:32:222:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:218:9:218:16 | fread output argument | user input (String read by fread) | test.cpp:220:10:220:16 | strncat output argument | strncat output argument |
+| test.cpp:222:32:222:38 | command | test.cpp:218:9:218:16 | fread output argument | test.cpp:222:32:222:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:218:9:218:16 | fread output argument | user input (String read by fread) | test.cpp:220:10:220:16 | strncat output argument | strncat output argument |
--- a/cpp/ql/test/query-tests/Security/CWE/CWE-078/semmle/ExecTainted/test.cpp
+++ b/cpp/ql/test/query-tests/Security/CWE/CWE-078/semmle/ExecTainted/test.cpp
@@ -199,7 +199,7 @@ void test17(FILE *f) {
 }

 void test18() {
-  // GOOD [FALSE POSITIVE]
+  // GOOD
  char command[1000] = "ls ", flags[1000] = "-l", filename[1000] = ".";

  concat(command, flags, filename);
@@ -207,4 +207,19 @@ void test18() {
  execl("/bin/sh", "sh", "-c", command);
 }

+#define CONCAT(COMMAND, FILENAME)   \
+  strncat(COMMAND, FILENAME, 1000); \
+  strncat(COMMAND, " ", 1000);      \
+  strncat(COMMAND, FILENAME, 1000);
+
+void test19(FILE *f) {
+  // BAD: the user string is injected directly into a command
+  char command[1000] = "mv ", filename[1000];
+  fread(filename, 1, 1000, f);
+
+  CONCAT(command, filename)
+
+  execl("/bin/sh", "sh", "-c", command);
+}
+
 // open question: do we want to report certain sources even when they're the start of the string?
--- a/csharp/ql/lib/semmle/code/csharp/commons/Collections.qll
+++ b/csharp/ql/lib/semmle/code/csharp/commons/Collections.qll
@@ -1,6 +1,7 @@
 /** Provides classes for collections. */

 import csharp
+import semmle.code.csharp.frameworks.system.Collections

 private string modifyMethodName() {
  result =
@@ -66,6 +67,12 @@ class CollectionType extends RefType {
  }
 }

+/** Holds if `t` is a collection type. */
+predicate isCollectionType(ValueOrRefType t) {
+  t.getABaseType*() instanceof SystemCollectionsIEnumerableInterface and
+  not t instanceof StringType
+}
+
 /** An object creation that creates an empty collection. */
 class EmptyCollectionCreation extends ObjectCreation {
  EmptyCollectionCreation() {
--- a/csharp/ql/src/utils/model-generator/ModelGeneratorUtilsSpecific.qll
+++ b/csharp/ql/src/utils/model-generator/ModelGeneratorUtilsSpecific.qll
@@ -1,6 +1,7 @@
 import csharp
 import semmle.code.csharp.dataflow.internal.DataFlowPrivate
 private import semmle.code.csharp.commons.Util
+private import semmle.code.csharp.commons.Collections
 private import semmle.code.csharp.dataflow.internal.DataFlowImplCommon
 private import semmle.code.csharp.dataflow.internal.DataFlowDispatch

@@ -28,14 +29,8 @@ predicate asPartialModel = Csv::asPartialModel/1;
 */
 predicate isRelevantType(Type t) { not t instanceof Enum }

-private predicate isPrimitiveTypeUsedForBulkData(Type t) {
-  t.getName().regexpMatch("byte|char|Byte|Char")
-}
-
 private string parameterAccess(Parameter p) {
-  if
-    p.getType() instanceof ArrayType and
-    not isPrimitiveTypeUsedForBulkData(p.getType().(ArrayType).getElementType())
+  if isCollectionType(p.getType())
  then result = "Argument[" + p.getPosition() + "].Element"
  else result = "Argument[" + p.getPosition() + "]"
 }
--- a/csharp/ql/test/utils/model-generator/CaptureSummaryModels.expected
+++ b/csharp/ql/test/utils/model-generator/CaptureSummaryModels.expected
@@ -1,6 +1,5 @@
-| Summaries;BasicFlow;false;AssignFieldToArray;(System.Object[]);Argument[Qualifier];Argument[0].Element;taint |
-| Summaries;BasicFlow;false;AssignToArray;(System.Int32,System.Int32[]);Argument[0];Argument[1].Element;taint |
-| Summaries;BasicFlow;false;ReturnArrayElement;(System.Int32[]);Argument[0].Element;ReturnValue;taint |
+| NoSummaries;PublicClassFlow;false;PublicReturn;(System.Int32);Argument[0];ReturnValue;taint |
+| Summaries;BaseClassFlow;true;ReturnParam;(System.Int32);Argument[0];ReturnValue;taint |
 | Summaries;BasicFlow;false;ReturnField;();Argument[Qualifier];ReturnValue;taint |
 | Summaries;BasicFlow;false;ReturnParam0;(System.String,System.Object);Argument[0];ReturnValue;taint |
 | Summaries;BasicFlow;false;ReturnParam1;(System.String,System.Object);Argument[1];ReturnValue;taint |
@@ -9,3 +8,23 @@
 | Summaries;BasicFlow;false;ReturnSubstring;(System.String);Argument[0];ReturnValue;taint |
 | Summaries;BasicFlow;false;ReturnThis;(System.Object);Argument[Qualifier];ReturnValue;value |
 | Summaries;BasicFlow;false;SetField;(System.String);Argument[0];Argument[Qualifier];taint |
+| Summaries;CollectionFlow;false;AddFieldToList;(System.Collections.Generic.List<System.String>);Argument[Qualifier];Argument[0].Element;taint |
+| Summaries;CollectionFlow;false;AddToList;(System.Collections.Generic.List<System.Object>,System.Object);Argument[1];Argument[0].Element;taint |
+| Summaries;CollectionFlow;false;AssignFieldToArray;(System.Object[]);Argument[Qualifier];Argument[0].Element;taint |
+| Summaries;CollectionFlow;false;AssignToArray;(System.Int32,System.Int32[]);Argument[0];Argument[1].Element;taint |
+| Summaries;CollectionFlow;false;ReturnArrayElement;(System.Int32[]);Argument[0].Element;ReturnValue;taint |
+| Summaries;CollectionFlow;false;ReturnFieldInAList;();Argument[Qualifier];ReturnValue;taint |
+| Summaries;CollectionFlow;false;ReturnListElement;(System.Collections.Generic.List<System.Object>);Argument[0].Element;ReturnValue;taint |
+| Summaries;DerivedClass1Flow;false;ReturnParam1;(System.Int32,System.Int32);Argument[1];ReturnValue;taint |
+| Summaries;DerivedClass2Flow;false;ReturnParam0;(System.Int32,System.Int32);Argument[0];ReturnValue;taint |
+| Summaries;DerivedClass2Flow;false;ReturnParam;(System.Int32);Argument[0];ReturnValue;taint |
+| Summaries;GenericFlow<>;false;AddFieldToGenericList;(System.Collections.Generic.List<T>);Argument[Qualifier];Argument[0].Element;taint |
+| Summaries;GenericFlow<>;false;AddToGenericList<>;(System.Collections.Generic.List<S>,S);Argument[1];Argument[0].Element;taint |
+| Summaries;GenericFlow<>;false;ReturnFieldInGenericList;();Argument[Qualifier];ReturnValue;taint |
+| Summaries;GenericFlow<>;false;ReturnGenericElement<>;(System.Collections.Generic.List<S>);Argument[0].Element;ReturnValue;taint |
+| Summaries;GenericFlow<>;false;ReturnGenericField;();Argument[Qualifier];ReturnValue;taint |
+| Summaries;GenericFlow<>;false;ReturnGenericParam<>;(S);Argument[0];ReturnValue;taint |
+| Summaries;GenericFlow<>;false;SetGenericField;(T);Argument[0];Argument[Qualifier];taint |
+| Summaries;IEnumerableFlow;false;ReturnFieldInIEnumerable;();Argument[Qualifier];ReturnValue;taint |
+| Summaries;IEnumerableFlow;false;ReturnIEnumerable;(System.Collections.Generic.IEnumerable<System.String>);Argument[0].Element;ReturnValue;taint |
+| Summaries;IEnumerableFlow;false;ReturnIEnumerableElement;(System.Collections.Generic.IEnumerable<System.Object>);Argument[0].Element;ReturnValue;taint |
--- a/csharp/ql/test/utils/model-generator/NoSummaries.cs
+++ b/csharp/ql/test/utils/model-generator/NoSummaries.cs
@@ -0,0 +1,45 @@
+using System;
+
+namespace NoSummaries;
+
+// Single class with a method that produces a flow summary.
+// Just to prove that, if a method like this is correctly exposed, a flow summary will be captured.
+public class PublicClassFlow
+{
+    public int PublicReturn(int input)
+    {
+        return input;
+    }
+}
+
+public sealed class PublicClassNoFlow
+{
+    private int PrivateReturn(int input)
+    {
+        return input;
+    }
+
+    internal int InternalReturn(int input)
+    {
+        return input;
+    }
+
+    private class PrivateClassNoFlow
+    {
+        public int ReturnParam(int input)
+        {
+            return input;
+        }
+    }
+
+    private class PrivateClassNestedPublicClassNoFlow
+    {
+        public class NestedPublicClassFlow
+        {
+            public int ReturnParam(int input)
+            {
+                return input;
+            }
+        }
+    }
+}
--- a/csharp/ql/test/utils/model-generator/Summaries.cs
+++ b/csharp/ql/test/utils/model-generator/Summaries.cs
@@ -1,4 +1,6 @@
 using System;
+using System.Linq;
+using System.Collections.Generic;

 namespace Summaries;

@@ -31,6 +33,21 @@ public class BasicFlow
        return s.Substring(0, 1);
    }

+    public void SetField(string s)
+    {
+        tainted = s;
+    }
+
+    public string ReturnField()
+    {
+        return tainted;
+    }
+}
+
+public class CollectionFlow
+{
+    private string tainted;
+
    public int ReturnArrayElement(int[] input)
    {
        return input[0];
@@ -41,18 +58,117 @@ public class BasicFlow
        target[0] = data;
    }

-    public void SetField(string s)
-    {
-        tainted = s;
-    }
-
-    public string ReturnField()
-    {
-        return tainted;
-    }
-
    public void AssignFieldToArray(object[] target)
    {
        target[0] = tainted;
    }
+
+    public object ReturnListElement(List<object> input)
+    {
+        return input[0];
+    }
+
+    public void AddToList(List<object> input, object data)
+    {
+        input.Add(data);
+    }
+
+    public void AddFieldToList(List<string> input)
+    {
+        input.Add(tainted);
+    }
+
+    public List<string> ReturnFieldInAList()
+    {
+        return new List<string> { tainted };
+    }
+}
+
+public class IEnumerableFlow
+{
+    private string tainted;
+
+    public IEnumerable<string> ReturnIEnumerable(IEnumerable<string> input)
+    {
+        return input;
+    }
+
+    public object ReturnIEnumerableElement(IEnumerable<object> input)
+    {
+        return input.First();
+    }
+
+    public IEnumerable<string> ReturnFieldInIEnumerable()
+    {
+        return new List<string> { tainted };
+    }
+}
+
+public class GenericFlow<T>
+{
+    private T tainted;
+
+    public void SetGenericField(T t)
+    {
+        tainted = t;
+    }
+
+    public T ReturnGenericField()
+    {
+        return tainted;
+    }
+
+    public void AddFieldToGenericList(List<T> input)
+    {
+        input.Add(tainted);
+    }
+
+    public List<T> ReturnFieldInGenericList()
+    {
+        return new List<T> { tainted };
+    }
+
+    public S ReturnGenericParam<S>(S input)
+    {
+        return input;
+    }
+
+    public S ReturnGenericElement<S>(List<S> input)
+    {
+        return input[0];
+    }
+
+    public void AddToGenericList<S>(List<S> input, S data)
+    {
+        input.Add(data);
+    }
+}
+
+public abstract class BaseClassFlow
+{
+    public virtual int ReturnParam(int input)
+    {
+        return input;
+    }
+}
+
+public class DerivedClass1Flow : BaseClassFlow
+{
+    public int ReturnParam1(int input0, int input1)
+    {
+        return input1;
+    }
+}
+
+public class DerivedClass2Flow : BaseClassFlow
+{
+    public override int ReturnParam(int input)
+    {
+        return input;
+    }
+
+    public int ReturnParam0(int input0, int input1)
+    {
+        return input0;
+    }
 }
--- a/csharp/ql/test/utils/model-generator/options
+++ b/csharp/ql/test/utils/model-generator/options
@@ -0,0 +1 @@
+semmle-extractor-options: /r:System.Linq.dll 
--- a/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst
+++ b/docs/codeql/codeql-language-guides/modeling-data-flow-in-go-libraries.rst
@@ -7,8 +7,8 @@ When analyzing a Go program, CodeQL does not examine the source code for
 external packages. To track the flow of untrusted data through a library, you
 can create a model of the library.

-You can find existing models in the ``ql/src/semmle/go/frameworks/`` folder of the
-`CodeQL for Go repository <https://github.com/github/codeql-go/tree/main/ql/src/semmle/go/frameworks>`__.
+You can find existing models in the ``ql/lib/semmle/go/frameworks/`` folder of the
+`CodeQL for Go repository <https://github.com/github/codeql-go/tree/main/ql/lib/semmle/go/frameworks>`__.
 To add a new model, you should make a new file in that folder, named after the library.

 Sources
@@ -102,8 +102,8 @@ Data-flow sinks are specified by queries rather than by library models.
 However, you can use library models to indicate when functions belong to
 special categories. Queries can then use these categories when specifying
 sinks. Classes representing these special categories are contained in
-``ql/src/semmle/go/Concepts.qll`` in the `CodeQL for Go repository
-<https://github.com/github/codeql-go/blob/main/ql/src/semmle/go/Concepts.qll>`__.
+``ql/lib/semmle/go/Concepts.qll`` in the `CodeQL for Go repository
+<https://github.com/github/codeql-go/blob/main/ql/lib/semmle/go/Concepts.qll>`__.
 ``Concepts.qll`` includes classes for logger mechanisms,
 HTTP response writers, HTTP redirects, and marshaling and unmarshaling
 functions.
--- a/javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/javascript-experimental-atm-lib
-version: 0.1.1
+version: 0.2.1
 extractor: javascript
 library: true
 groups:
--- a/javascript/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/model/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/javascript-experimental-atm-model
-version: 0.0.6
+version: 0.1.1
 groups:
    - javascript
    - experimental
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/codeql-pack.lock.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/codeql-pack.lock.yml
@@ -1,6 +1,6 @@
 ---
 dependencies:
  codeql/javascript-experimental-atm-model:
-    version: 0.0.6
+    version: 0.1.0
 compiled: false
 lockVersion: 1.0.0
--- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml
@@ -6,4 +6,4 @@ groups:
    - experimental
 dependencies:
    codeql/javascript-experimental-atm-lib: "*"
-    codeql/javascript-experimental-atm-model: "0.0.6"
+    codeql/javascript-experimental-atm-model: "0.1.0"
--- a/javascript/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/src/codeql-pack.lock.yml
@@ -1,6 +1,6 @@
 ---
 dependencies:
  codeql/javascript-experimental-atm-model:
-    version: 0.0.6
+    version: 0.1.0
 compiled: false
 lockVersion: 1.0.0
--- a/javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml
@@ -1,6 +1,6 @@
 name: codeql/javascript-experimental-atm-queries
 language: javascript
-version: 0.1.1
+version: 0.2.1
 suites: codeql-suites
 defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls
 groups:
@@ -8,4 +8,4 @@ groups:
    - experimental
 dependencies:
    codeql/javascript-experimental-atm-lib: "*"
-    codeql/javascript-experimental-atm-model: "0.0.6"
+    codeql/javascript-experimental-atm-model: "0.1.0"
--- a/javascript/ql/experimental/adaptivethreatmodeling/test/codeql-pack.lock.yml
+++ b/javascript/ql/experimental/adaptivethreatmodeling/test/codeql-pack.lock.yml
@@ -1,6 +1,6 @@
 ---
 dependencies:
  codeql/javascript-experimental-atm-model:
-    version: 0.0.6
+    version: 0.1.0
 compiled: false
 lockVersion: 1.0.0
--- a/javascript/ql/lib/change-notes/2022-03-23-api-graph-receiver-label.md
+++ b/javascript/ql/lib/change-notes/2022-03-23-api-graph-receiver-label.md
@@ -0,0 +1,7 @@
+---
+category: fix
+---
+* The following predicates on `API::Node` have been changed so as not to include the receiver. The receiver should now only be accessed via `getReceiver()`.
+  - `getParameter(int i)` previously included the receiver when `i = -1`
+  - `getAParameter()` previously included the receiver
+  - `getLastParameter()` previously included the receiver for calls with no arguments
--- a/javascript/ql/lib/semmle/javascript/ApiGraphs.qll
+++ b/javascript/ql/lib/semmle/javascript/ApiGraphs.qll
@@ -187,8 +187,7 @@ module API {
    }

    /**
-     * Gets a node representing a parameter or the receiver of the function represented by this
-     * node.
+     * Gets a node representing a parameter of the function represented by this node.
     *
     * This predicate may result in a mix of parameters from different call sites in cases where
     * there are multiple invocations of this API component.
@@ -198,8 +197,6 @@ module API {
    Node getAParameter() {
      Stages::ApiStage::ref() and
      result = this.getParameter(_)
-      or
-      result = this.getReceiver()
    }

    /**
@@ -561,9 +558,10 @@ module API {
          rhs = f.getExceptionalReturn()
        )
        or
-        exists(int i |
-          lbl = Label::parameter(i) and
-          argumentPassing(base, i, rhs)
+        exists(int i | argumentPassing(base, i, rhs) |
+          lbl = Label::parameter(i)
+          or
+          i = -1 and lbl = Label::receiver()
        )
        or
        exists(DataFlow::SourceNode src, DataFlow::PropWrite pw |
@@ -1096,8 +1094,8 @@ module API {
     */
    LabelParameter parameter(int i) { result.getIndex() = i }

-    /** Gets the `parameter` edge label for the receiver. */
-    LabelParameter receiver() { result = parameter(-1) }
+    /** Gets the edge label for the receiver. */
+    LabelReceiver receiver() { any() }

    /** Gets the `return` edge label. */
    LabelReturn return() { any() }
@@ -1132,12 +1130,13 @@ module API {
        MkLabelUnknownMember() or
        MkLabelParameter(int i) {
          i =
-            [-1 .. max(int args |
+            [0 .. max(int args |
                args = any(InvokeExpr invk).getNumArgument() or
                args = any(Function f).getNumParameter()
              )] or
          i = [0 .. 10]
        } or
+        MkLabelReceiver() or
        MkLabelReturn() or
        MkLabelPromised() or
        MkLabelPromisedError() or
@@ -1225,6 +1224,11 @@ module API {
        /** Gets the index of the parameter for this label. */
        int getIndex() { result = i }
      }
+
+      /** A label for the receiver of call, that is, the value passed as `this`. */
+      class LabelReceiver extends ApiLabel, MkLabelReceiver {
+        override string toString() { result = "receiver" }
+      }
    }
  }
 }
--- a/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll
+++ b/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll
@@ -0,0 +1,6 @@
+/**
+ * This file contains imports required for the JavaScript version of `ConceptsShared.qll`.
+ * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages.
+ */
+
+import semmle.javascript.dataflow.DataFlow::DataFlow as DataFlow
--- a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
+++ b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll
@@ -0,0 +1,13 @@
+/**
+ * Provides Concepts which are shared across languages.
+ *
+ * Each language has a language specific `Concepts.qll` file that can import the
+ * shared concepts from this file. A language can either re-export the concept directly,
+ * or can add additional member-predicates that are needed for that language.
+ *
+ * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from
+ * each language, but we will maintain a discipline of moving those concepts to
+ * `ConceptsShared.qll` ASAP.
+ */
+
+private import ConceptsImports
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/ExternalAPIUsedWithUntrustedDataCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/ExternalAPIUsedWithUntrustedDataCustomizations.qll
@@ -219,7 +219,6 @@ module ExternalApiUsedWithUntrustedData {
        or
        exists(string callbackName, int index |
          node = getNamedParameter(base.getParameter(index).getMember(callbackName), paramName) and
-          index != -1 and // ignore receiver
          result =
            basename + ".[callback " + index + " '" + callbackName + "'].[param '" + paramName +
              "']"
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomCustomizations.qll
@@ -32,34 +32,56 @@ module XssThroughDom {
   */
  string unsafeDomPropertyName() { result = ["innerText", "textContent", "value", "name", "src"] }

+  /** A read of a DOM property seen as a source for cross-site scripting vulnerabilities through the DOM. */
+  abstract class DomPropertySource extends Source {
    /**
-   * A source for text from the DOM from a JQuery method call.
+     * Gets the name of the DOM property that the source originated from.
     */
-  class JQueryTextSource extends Source, JQuery::MethodCall {
-    JQueryTextSource() {
-      (
-        this.getMethodName() = ["text", "val"] and this.getNumArgument() = 0
-        or
-        exists(string methodName, string value |
-          this.getMethodName() = methodName and
-          this.getNumArgument() = 1 and
-          forex(InferredType t | t = this.getArgument(0).analyze().getAType() | t = TTString()) and
-          this.getArgument(0).mayHaveStringValue(value)
-        |
-          methodName = "attr" and value = unsafeAttributeName()
-          or
-          methodName = "prop" and value = unsafeDomPropertyName()
-        )
-      ) and
-      // looks like a $("<p>" + ... ) source, which is benign for this query.
-      not exists(DataFlow::Node prefix |
-        DomBasedXss::isPrefixOfJQueryHtmlString(this.getReceiver()
+    abstract string getPropertyName();
+  }
+
+  /* Gets a jQuery method where the receiver looks like `$("<p>" + ... )`, which is benign for this query. */
+  private JQuery::MethodCall benignJQueryMethod() {
+    exists(DataFlow::Node prefix |
+      DomBasedXss::isPrefixOfJQueryHtmlString(result
+            .getReceiver()
            .(DataFlow::CallNode)
            .getAnArgument(), prefix)
    |
      prefix.getStringValue().regexpMatch("\\s*<.*")
    )
  }
+
+  /** A source for text from the DOM from a JQuery method call. */
+  class JQueryTextSource extends Source instanceof JQuery::MethodCall {
+    JQueryTextSource() {
+      this.getMethodName() = ["text", "val"] and
+      this.getNumArgument() = 0 and
+      not this = benignJQueryMethod()
+    }
+  }
+
+  /**
+   * A source for text from a DOM property read by jQuery.
+   */
+  class JQueryDOMPropertySource extends DomPropertySource instanceof JQuery::MethodCall {
+    string prop;
+
+    JQueryDOMPropertySource() {
+      exists(string methodName |
+        this.getMethodName() = methodName and
+        this.getNumArgument() = 1 and
+        forex(InferredType t | t = this.getArgument(0).analyze().getAType() | t = TTString()) and
+        this.getArgument(0).mayHaveStringValue(prop)
+      |
+        methodName = "attr" and prop = unsafeAttributeName()
+        or
+        methodName = "prop" and prop = unsafeDomPropertyName()
+      ) and
+      not this = benignJQueryMethod()
+    }
+
+    override string getPropertyName() { result = prop }
  }

  /**
@@ -88,19 +110,25 @@ module XssThroughDom {
  /**
   * A source for text from the DOM from a DOM property read or call to `getAttribute()`.
   */
-  class DomTextSource extends Source {
+  class DomTextSource extends DomPropertySource {
+    string prop;
+
    DomTextSource() {
      exists(DataFlow::PropRead read | read = this |
        read.getBase().getALocalSource() = DOM::domValueRef() and
-        read.mayHavePropertyName(unsafeDomPropertyName())
+        prop = unsafeDomPropertyName() and
+        read.mayHavePropertyName(prop)
      )
      or
      exists(DataFlow::MethodCallNode mcn | mcn = this |
        mcn.getReceiver().getALocalSource() = DOM::domValueRef() and
        mcn.getMethodName() = "getAttribute" and
-        mcn.getArgument(0).mayHaveStringValue(unsafeAttributeName())
+        prop = unsafeAttributeName() and
+        mcn.getArgument(0).mayHaveStringValue(prop)
      )
    }
+
+    override string getPropertyName() { result = prop }
  }

  /** DEPRECATED: Alias for DomTextSource */
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomQuery.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/XssThroughDomQuery.qll
@@ -35,4 +35,13 @@ class Configuration extends TaintTracking::Configuration {
  override predicate isSanitizerEdge(DataFlow::Node pred, DataFlow::Node succ) {
    DomBasedXss::isOptionallySanitizedEdge(pred, succ)
  }
+
+  override predicate hasFlowPath(DataFlow::SourcePathNode src, DataFlow::SinkPathNode sink) {
+    super.hasFlowPath(src, sink) and
+    // filtering away readings of `src` that end in a URL sink.
+    not (
+      sink.getNode() instanceof DomBasedXss::WriteURLSink and
+      src.getNode().(DomPropertySource).getPropertyName() = "src"
+    )
+  }
 }
--- a/javascript/ql/test/ApiGraphs/bound-args/index.js
+++ b/javascript/ql/test/ApiGraphs/bound-args/index.js
@@ -1,7 +1,7 @@
 import bar from 'foo';

 let boundbar = bar.bind(
-    "receiver", // def (parameter -1 (member default (member exports (module foo))))
+    "receiver", // def (receiver (member default (member exports (module foo))))
    "firstarg"  // def (parameter 0 (member default (member exports (module foo))))
 );
 boundbar(
@@ -9,7 +9,7 @@ boundbar(
 )

 let boundbar2 = boundbar.bind(
-    "ignored", // !def (parameter -1 (member default (member exports (module foo))))
+    "ignored", // !def (receiver (member default (member exports (module foo))))
    "othersecondarg" // def (parameter 1 (member default (member exports (module foo))))
 )
 boundbar2(
--- a/javascript/ql/test/ApiGraphs/partial-invoke/index.js
+++ b/javascript/ql/test/ApiGraphs/partial-invoke/index.js
@@ -2,7 +2,7 @@ const cp = require('child_process');

 module.exports = function () {
    return cp.spawn.bind(
-        cp,   // def (parameter -1 (member spawn (member exports (module child_process))))
+        cp,   // def (receiver (member spawn (member exports (module child_process))))
        "cat" // def (parameter 0 (member spawn (member exports (module child_process))))
    );
 };
--- a/javascript/ql/test/library-tests/frameworks/Knex/test.expected
+++ b/javascript/ql/test/library-tests/frameworks/Knex/test.expected
@@ -42,11 +42,11 @@ knexObject
 | tst.js:17:1:17:23 | use (return (member avg (return (member exports (module knex))))) |
 | tst.js:17:1:19:4 | use (return (member from (return (member avg (return (member exports (module knex))))))) |
 | tst.js:17:1:19:24 | use (return (member as (return (member from (return (member avg (return (member exports (module knex))))))))) |
-| tst.js:17:30:17:29 | use (parameter -1 (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))) |
-| tst.js:18:5:18:38 | use (return (member sum (parameter -1 (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))) |
-| tst.js:18:5:18:49 | use (return (member from (return (member sum (parameter -1 (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))) |
-| tst.js:18:5:18:68 | use (return (member groupBy (return (member from (return (member sum (parameter -1 (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))))) |
-| tst.js:18:5:18:77 | use (return (member as (return (member groupBy (return (member from (return (member sum (parameter -1 (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))))))) |
+| tst.js:17:30:17:29 | use (receiver (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))) |
+| tst.js:18:5:18:38 | use (return (member sum (receiver (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))) |
+| tst.js:18:5:18:49 | use (return (member from (return (member sum (receiver (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))) |
+| tst.js:18:5:18:68 | use (return (member groupBy (return (member from (return (member sum (receiver (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))))) |
+| tst.js:18:5:18:77 | use (return (member as (return (member groupBy (return (member from (return (member sum (receiver (parameter 0 (member from (return (member avg (return (member exports (module knex)))))))))))))))) |
 | tst.js:21:1:21:38 | use (return (member column (return (member exports (module knex))))) |
 | tst.js:21:1:21:47 | use (return (member select (return (member column (return (member exports (module knex))))))) |
 | tst.js:21:1:21:61 | use (return (member from (return (member select (return (member column (return (member exports (module knex))))))))) |
@@ -70,14 +70,14 @@ knexObject
 | tst.js:42:1:42:13 | use (return (return (member exports (module knex)))) |
 | tst.js:42:1:45:3 | use (return (member where (return (return (member exports (module knex)))))) |
 | tst.js:42:1:48:4 | use (return (member andWhere (return (member where (return (return (member exports (module knex)))))))) |
-| tst.js:46:13:46:12 | use (parameter -1 (parameter 0 (member andWhere (return (member where (return (return (member exports (module knex))))))))) |
-| tst.js:47:5:47:29 | use (return (member where (parameter -1 (parameter 0 (member andWhere (return (member where (return (return (member exports (module knex))))))))))) |
+| tst.js:46:13:46:12 | use (receiver (parameter 0 (member andWhere (return (member where (return (return (member exports (module knex))))))))) |
+| tst.js:47:5:47:29 | use (return (member where (receiver (parameter 0 (member andWhere (return (member where (return (return (member exports (module knex))))))))))) |
 | tst.js:50:1:50:13 | use (return (return (member exports (module knex)))) |
 | tst.js:50:1:52:2 | use (return (member where (return (return (member exports (module knex)))))) |
 | tst.js:50:1:52:28 | use (return (member orWhere (return (member where (return (return (member exports (module knex)))))))) |
-| tst.js:50:21:50:20 | use (parameter -1 (parameter 0 (member where (return (return (member exports (module knex))))))) |
-| tst.js:51:3:51:21 | use (return (member where (parameter -1 (parameter 0 (member where (return (return (member exports (module knex))))))))) |
-| tst.js:51:3:51:44 | use (return (member orWhere (return (member where (parameter -1 (parameter 0 (member where (return (return (member exports (module knex))))))))))) |
+| tst.js:50:21:50:20 | use (receiver (parameter 0 (member where (return (return (member exports (module knex))))))) |
+| tst.js:51:3:51:21 | use (return (member where (receiver (parameter 0 (member where (return (return (member exports (module knex))))))))) |
+| tst.js:51:3:51:44 | use (return (member orWhere (return (member where (receiver (parameter 0 (member where (return (return (member exports (module knex))))))))))) |
 | tst.js:54:1:54:13 | use (return (return (member exports (module knex)))) |
 | tst.js:54:1:54:56 | use (return (member where (return (return (member exports (module knex)))))) |
 | tst.js:56:1:56:13 | use (return (return (member exports (module knex)))) |
@@ -100,9 +100,9 @@ knexObject
 | tst.js:70:1:70:13 | use (return (return (member exports (module knex)))) |
 | tst.js:70:1:72:2 | use (return (member whereNot (return (return (member exports (module knex)))))) |
 | tst.js:70:1:72:31 | use (return (member orWhereNot (return (member whereNot (return (return (member exports (module knex)))))))) |
-| tst.js:70:24:70:23 | use (parameter -1 (parameter 0 (member whereNot (return (return (member exports (module knex))))))) |
-| tst.js:71:3:71:21 | use (return (member where (parameter -1 (parameter 0 (member whereNot (return (return (member exports (module knex))))))))) |
-| tst.js:71:3:71:47 | use (return (member orWhereNot (return (member where (parameter -1 (parameter 0 (member whereNot (return (return (member exports (module knex))))))))))) |
+| tst.js:70:24:70:23 | use (receiver (parameter 0 (member whereNot (return (return (member exports (module knex))))))) |
+| tst.js:71:3:71:21 | use (return (member where (receiver (parameter 0 (member whereNot (return (return (member exports (module knex))))))))) |
+| tst.js:71:3:71:47 | use (return (member orWhereNot (return (member where (receiver (parameter 0 (member whereNot (return (return (member exports (module knex))))))))))) |
 | tst.js:74:19:74:31 | use (return (return (member exports (module knex)))) |
 | tst.js:74:19:75:30 | use (return (member whereNot (return (return (member exports (module knex)))))) |
 | tst.js:74:19:76:31 | use (return (member andWhere (return (member whereNot (return (return (member exports (module knex)))))))) |
@@ -128,10 +128,10 @@ knexObject
 | tst.js:97:1:97:40 | use (return (member whereNotNull (return (return (member exports (module knex)))))) |
 | tst.js:99:1:99:13 | use (return (return (member exports (module knex)))) |
 | tst.js:99:1:101:2 | use (return (member whereExists (return (return (member exports (module knex)))))) |
-| tst.js:99:27:99:26 | use (parameter -1 (parameter 0 (member whereExists (return (return (member exports (module knex))))))) |
-| tst.js:100:3:100:18 | use (return (member select (parameter -1 (parameter 0 (member whereExists (return (return (member exports (module knex))))))))) |
-| tst.js:100:3:100:35 | use (return (member from (return (member select (parameter -1 (parameter 0 (member whereExists (return (return (member exports (module knex))))))))))) |
-| tst.js:100:3:100:78 | use (return (member whereRaw (return (member from (return (member select (parameter -1 (parameter 0 (member whereExists (return (return (member exports (module knex))))))))))))) |
+| tst.js:99:27:99:26 | use (receiver (parameter 0 (member whereExists (return (return (member exports (module knex))))))) |
+| tst.js:100:3:100:18 | use (return (member select (receiver (parameter 0 (member whereExists (return (return (member exports (module knex))))))))) |
+| tst.js:100:3:100:35 | use (return (member from (return (member select (receiver (parameter 0 (member whereExists (return (return (member exports (module knex))))))))))) |
+| tst.js:100:3:100:78 | use (return (member whereRaw (return (member from (return (member select (receiver (parameter 0 (member whereExists (return (return (member exports (module knex))))))))))))) |
 | tst.js:103:1:103:13 | use (return (return (member exports (module knex)))) |
 | tst.js:103:1:103:103 | use (return (member whereExists (return (return (member exports (module knex)))))) |
 | tst.js:103:27:103:42 | use (return (member select (return (member exports (module knex))))) |
@@ -139,10 +139,10 @@ knexObject
 | tst.js:103:27:103:102 | use (return (member whereRaw (return (member from (return (member select (return (member exports (module knex))))))))) |
 | tst.js:105:1:105:13 | use (return (return (member exports (module knex)))) |
 | tst.js:105:1:107:2 | use (return (member whereNotExists (return (return (member exports (module knex)))))) |
-| tst.js:105:30:105:29 | use (parameter -1 (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))) |
-| tst.js:106:3:106:18 | use (return (member select (parameter -1 (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))) |
-| tst.js:106:3:106:35 | use (return (member from (return (member select (parameter -1 (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))))) |
-| tst.js:106:3:106:78 | use (return (member whereRaw (return (member from (return (member select (parameter -1 (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))))))) |
+| tst.js:105:30:105:29 | use (receiver (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))) |
+| tst.js:106:3:106:18 | use (return (member select (receiver (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))) |
+| tst.js:106:3:106:35 | use (return (member from (return (member select (receiver (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))))) |
+| tst.js:106:3:106:78 | use (return (member whereRaw (return (member from (return (member select (receiver (parameter 0 (member whereNotExists (return (return (member exports (module knex))))))))))))) |
 | tst.js:109:1:109:13 | use (return (return (member exports (module knex)))) |
 | tst.js:109:1:109:45 | use (return (member whereBetween (return (return (member exports (module knex)))))) |
 | tst.js:111:1:111:13 | use (return (return (member exports (module knex)))) |
--- a/javascript/ql/test/query-tests/Security/CWE-079/XssThroughDom/XssThroughDom.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-079/XssThroughDom/XssThroughDom.expected
@@ -122,6 +122,13 @@ nodes
 | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" |
 | xss-through-dom.js:109:45:109:55 | this.el.src |
 | xss-through-dom.js:109:45:109:55 | this.el.src |
+| xss-through-dom.js:114:11:114:52 | src |
+| xss-through-dom.js:114:17:114:52 | documen ... k").src |
+| xss-through-dom.js:114:17:114:52 | documen ... k").src |
+| xss-through-dom.js:115:16:115:18 | src |
+| xss-through-dom.js:115:16:115:18 | src |
+| xss-through-dom.js:117:26:117:28 | src |
+| xss-through-dom.js:117:26:117:28 | src |
 edges
 | forms.js:8:23:8:28 | values | forms.js:9:31:9:36 | values |
 | forms.js:8:23:8:28 | values | forms.js:9:31:9:36 | values |
@@ -194,6 +201,12 @@ edges
 | xss-through-dom.js:109:45:109:55 | this.el.src | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" |
 | xss-through-dom.js:109:45:109:55 | this.el.src | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" |
 | xss-through-dom.js:109:45:109:55 | this.el.src | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" |
+| xss-through-dom.js:114:11:114:52 | src | xss-through-dom.js:115:16:115:18 | src |
+| xss-through-dom.js:114:11:114:52 | src | xss-through-dom.js:115:16:115:18 | src |
+| xss-through-dom.js:114:11:114:52 | src | xss-through-dom.js:117:26:117:28 | src |
+| xss-through-dom.js:114:11:114:52 | src | xss-through-dom.js:117:26:117:28 | src |
+| xss-through-dom.js:114:17:114:52 | documen ... k").src | xss-through-dom.js:114:11:114:52 | src |
+| xss-through-dom.js:114:17:114:52 | documen ... k").src | xss-through-dom.js:114:11:114:52 | src |
 #select
 | forms.js:9:31:9:40 | values.foo | forms.js:8:23:8:28 | values | forms.js:9:31:9:40 | values.foo | $@ is reinterpreted as HTML without escaping meta-characters. | forms.js:8:23:8:28 | values | DOM text |
 | forms.js:12:31:12:40 | values.bar | forms.js:11:24:11:29 | values | forms.js:12:31:12:40 | values.bar | $@ is reinterpreted as HTML without escaping meta-characters. | forms.js:11:24:11:29 | values | DOM text |
@@ -228,3 +241,4 @@ edges
 | xss-through-dom.js:93:16:93:46 | $("#foo ... ].value | xss-through-dom.js:93:16:93:46 | $("#foo ... ].value | xss-through-dom.js:93:16:93:46 | $("#foo ... ].value | $@ is reinterpreted as HTML without escaping meta-characters. | xss-through-dom.js:93:16:93:46 | $("#foo ... ].value | DOM text |
 | xss-through-dom.js:96:17:96:47 | $("#foo ... ].value | xss-through-dom.js:96:17:96:47 | $("#foo ... ].value | xss-through-dom.js:96:17:96:47 | $("#foo ... ].value | $@ is reinterpreted as HTML without escaping meta-characters. | xss-through-dom.js:96:17:96:47 | $("#foo ... ].value | DOM text |
 | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" | xss-through-dom.js:109:45:109:55 | this.el.src | xss-through-dom.js:109:31:109:70 | "<a src ... oo</a>" | $@ is reinterpreted as HTML without escaping meta-characters. | xss-through-dom.js:109:45:109:55 | this.el.src | DOM text |
+| xss-through-dom.js:115:16:115:18 | src | xss-through-dom.js:114:17:114:52 | documen ... k").src | xss-through-dom.js:115:16:115:18 | src | $@ is reinterpreted as HTML without escaping meta-characters. | xss-through-dom.js:114:17:114:52 | documen ... k").src | DOM text |
--- a/javascript/ql/test/query-tests/Security/CWE-079/XssThroughDom/xss-through-dom.js
+++ b/javascript/ql/test/query-tests/Security/CWE-079/XssThroughDom/xss-through-dom.js
@@ -109,3 +109,10 @@ class Sub extends Super {
 		$("#id").get(0).innerHTML = "<a src=\"" + this.el.src + "\">foo</a>"; // NOT OK. Attack: `<mytag id="id" src="x:&quot;&gt;&lt;img src=1 onerror=&quot;alert(1)&quot;&gt;" />`
 	}
 }
+
+(function () {
+    const src = document.getElementById("#link").src;
+	$("#id").html(src); // NOT OK.
+
+    $("#id").attr("src", src); // OK
+})();
--- a/misc/suite-helpers/code-scanning-selectors.yml
+++ b/misc/suite-helpers/code-scanning-selectors.yml
@@ -27,3 +27,4 @@
    query path:
      - /^experimental\/.*/
      - Metrics/Summaries/FrameworkCoverage.ql
+      - /Diagnostics/Internal/.*/
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -445,6 +445,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
  override string getPrimaryQLClass() { result = "RegExpAlt" }
 }

+class RegExpCharEscape = RegExpEscape;
+
 /**
 * An escaped regular expression term, that is, a regular expression
 * term starting with a backslash, which is not a backreference.
@@ -751,6 +753,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
   */
  int getNumber() { result = re.getGroupNumber(start, end) }

+  /** Holds if this is a capture group. */
+  predicate isCapture() { exists(this.getNumber()) }
+
  /** Holds if this is a named capture group. */
  predicate isNamed() { exists(this.getName()) }

--- a/python/ql/lib/semmle/python/dataflow/new/Regexp.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/Regexp.qll
@@ -0,0 +1,41 @@
+/**
+ * Provides classes for working with regular expressions.
+ */
+
+private import semmle.python.RegexTreeView
+private import semmle.python.regex
+private import semmle.python.dataflow.new.DataFlow
+
+/**
+ * Provides utility predicates related to regular expressions.
+ */
+module RegExpPatterns {
+  /**
+   * Gets a pattern that matches common top-level domain names in lower case.
+   */
+  string getACommonTld() {
+    // according to ranking by http://google.com/search?q=site:.<<TLD>>
+    result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
+  }
+}
+
+/**
+ * A node whose value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+class RegExpPatternSource extends DataFlow::CfgNode {
+  private Regex astNode;
+
+  RegExpPatternSource() { astNode = this.asExpr() }
+
+  /**
+   * Gets a node where the pattern of this node is parsed as a part of
+   * a regular expression.
+   */
+  DataFlow::Node getAParse() { result = this }
+
+  /**
+   * Gets the root term of the regular expression parsed from this pattern.
+   */
+  RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
+}
--- a/python/ql/lib/semmle/python/internal/ConceptsImports.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsImports.qll
@@ -0,0 +1,6 @@
+/**
+ * This file contains imports required for the Python version of `ConceptsShared.qll`.
+ * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages.
+ */
+
+import semmle.python.dataflow.new.DataFlow
--- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
@@ -0,0 +1,13 @@
+/**
+ * Provides Concepts which are shared across languages.
+ *
+ * Each language has a language specific `Concepts.qll` file that can import the
+ * shared concepts from this file. A language can either re-export the concept directly,
+ * or can add additional member-predicates that are needed for that language.
+ *
+ * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from
+ * each language, but we will maintain a discipline of moving those concepts to
+ * `ConceptsShared.qll` ASAP.
+ */
+
+private import ConceptsImports
--- a/python/ql/src/Security/CWE-020/HostnameRegexpShared.qll
+++ b/python/ql/src/Security/CWE-020/HostnameRegexpShared.qll
@@ -0,0 +1,202 @@
+/**
+ * Provides predicates for reasoning about regular expressions
+ * that match URLs and hostname patterns.
+ */
+
+private import HostnameRegexpSpecific
+
+/**
+ * Holds if the given constant is unlikely to occur in the origin part of a URL.
+ */
+predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
+  // Look for any of these cases:
+  // - A character that can't occur in the origin
+  // - Two dashes in a row
+  // - A colon that is not part of port or scheme separator
+  // - A slash that is not part of scheme separator
+  term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
+}
+
+/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
+predicate isDotConstant(RegExpTerm term) {
+  term.(RegExpCharEscape).getValue() = "."
+  or
+  exists(RegExpCharacterClass cls |
+    term = cls and
+    not cls.isInverted() and
+    cls.getNumChild() = 1 and
+    cls.getAChild().(RegExpConstant).getValue() = "."
+  )
+}
+
+/** Holds if `term` is a wildcard `.` or an actual `.` character. */
+predicate isDotLike(RegExpTerm term) {
+  term instanceof RegExpDot
+  or
+  isDotConstant(term)
+}
+
+/** Holds if `term` will only ever be matched against the beginning of the input. */
+predicate matchesBeginningOfString(RegExpTerm term) {
+  term.isRootTerm()
+  or
+  exists(RegExpTerm parent | matchesBeginningOfString(parent) |
+    term = parent.(RegExpSequence).getChild(0)
+    or
+    parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
+    term = parent.(RegExpSequence).getChild(1)
+    or
+    term = parent.(RegExpAlt).getAChild()
+    or
+    term = parent.(RegExpGroup).getAChild()
+  )
+}
+
+/**
+ * Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
+ * excluding cases where this is at the very beginning of the regexp.
+ *
+ * `i` is bound to the index of the last child in the top-level domain part.
+ */
+predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
+  seq.getChild(i)
+      .(RegExpConstant)
+      .getValue()
+      .regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
+  isDotLike(seq.getChild(i - 1)) and
+  not (i = 1 and matchesBeginningOfString(seq))
+}
+
+/**
+ * Holds if the given regular expression term contains top-level domain preceded by a dot,
+ * such as `.com`.
+ */
+predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
+
+/**
+ * Holds if `term` will always match a hostname, that is, all disjunctions contain
+ * a hostname pattern that isn't inside a quantifier.
+ */
+predicate alwaysMatchesHostname(RegExpTerm term) {
+  hasTopLevelDomainEnding(term, _)
+  or
+  // `localhost` is considered a hostname pattern, but has no TLD
+  term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
+  or
+  not term instanceof RegExpAlt and
+  not term instanceof RegExpQuantifier and
+  alwaysMatchesHostname(term.getAChild())
+  or
+  alwaysMatchesHostnameAlt(term)
+}
+
+/** Holds if every child of `alt` contains a hostname pattern. */
+predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
+  alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
+}
+
+/**
+ * Holds if the first `i` children of `alt` contains a hostname pattern.
+ *
+ * This is used instead of `forall` to avoid materializing the set of alternatives
+ * that don't contains hostnames, which is much larger.
+ */
+predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
+  alwaysMatchesHostname(alt.getChild(0)) and i = 0
+  or
+  alwaysMatchesHostnameAlt(alt, i - 1) and
+  alwaysMatchesHostname(alt.getChild(i))
+}
+
+/**
+ * Holds if `term` occurs inside a quantifier or alternative (and thus
+ * can not be expected to correspond to a unique match), or as part of
+ * a lookaround assertion (which are rarely used for capture groups).
+ */
+predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
+  exists(RegExpParent parent | parent = term.getParent() |
+    parent instanceof RegExpAlt
+    or
+    parent instanceof RegExpQuantifier
+    or
+    parent instanceof RegExpSubPattern
+    or
+    isInsideChoiceOrSubPattern(parent)
+  )
+}
+
+/**
+ * Holds if `group` is likely to be used as a capture group.
+ */
+predicate isLikelyCaptureGroup(RegExpGroup group) {
+  group.isCapture() and
+  not isInsideChoiceOrSubPattern(group)
+}
+
+/**
+ * Holds if `seq` contains two consecutive dots `..` or escaped dots.
+ *
+ * At least one of these dots is not intended to be a subdomain separator,
+ * so we avoid flagging the pattern in this case.
+ */
+predicate hasConsecutiveDots(RegExpSequence seq) {
+  exists(int i |
+    isDotLike(seq.getChild(i)) and
+    isDotLike(seq.getChild(i + 1))
+  )
+}
+
+predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
+  seq = regexp.getAChild*() and
+  exists(RegExpDot unescapedDot, int i, string hostname |
+    hasTopLevelDomainEnding(seq, i) and
+    not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
+    not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
+    unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
+    unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
+    not hasConsecutiveDots(unescapedDot.getParent()) and
+    hostname =
+      seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
+        seq.getChild(i).getRawValue()
+  |
+    if unescapedDot.getParent() instanceof RegExpQuantifier
+    then
+      // `.*\.example.com` can match `evil.com/?x=.example.com`
+      //
+      // This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
+      // We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
+      // Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
+      // and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
+      seq.getChild(0) instanceof RegExpCaret and
+      not seq.getAChild() instanceof RegExpDollar and
+      seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
+      msg =
+        "has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+          + "' which may cause '" + hostname +
+          "' to be matched anywhere in the URL, outside the hostname."
+    else
+      msg =
+        "has an unescaped '.' before '" + hostname +
+          "', so it might match more hosts than expected."
+  )
+}
+
+predicate incompleteHostnameRegExp(
+  RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
+) {
+  exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
+    regexp = re.getRegExpTerm() and
+    isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
+    (
+      if re.getAParse() != re
+      then (
+        kind = "string, which is used as a regular expression $@," and
+        aux = re.getAParse()
+      ) else (
+        kind = "regular expression" and aux = re
+      )
+    )
+  |
+    message = "This " + kind + " " + msg and label = "here"
+  )
+}
--- a/python/ql/src/Security/CWE-020/HostnameRegexpSpecific.qll
+++ b/python/ql/src/Security/CWE-020/HostnameRegexpSpecific.qll
@@ -0,0 +1,3 @@
+import semmle.python.security.performance.RegExpTreeView
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.Regexp
--- a/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
+++ b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
@@ -8,35 +8,9 @@
 * @id py/incomplete-hostname-regexp
 * @tags correctness
 *       security
- *       external/cwe/cwe-20
+ *       external/cwe/cwe-020
 */

-import python
-import semmle.python.regex
+import HostnameRegexpShared

-private string commonTopLevelDomainRegex() { result = "com|org|edu|gov|uk|net|io" }
-
-/**
- * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
- * and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
- */
-bindingset[pattern]
-predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
-  hostPart =
-    pattern
-        .regexpCapture("(?i).*" +
-            // an unescaped single `.`
-            "(?<!\\\\)[.]" +
-            // immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
-            "([():|?a-z0-9-]+(\\\\)?[.](" + commonTopLevelDomainRegex() + "))" + ".*", 1)
-}
-
-from Regex r, string pattern, string hostPart
-where
-  r.getText() = pattern and
-  isIncompleteHostNameRegExpPattern(pattern, hostPart) and
-  // ignore patterns with capture groups after the TLD
-  not pattern.regexpMatch("(?i).*[.](" + commonTopLevelDomainRegex() + ").*[(][?]:.*[)].*")
-select r,
-  "This regular expression has an unescaped '.' before '" + hostPart +
-    "', so it might match more hosts than expected."
+query predicate problems = incompleteHostnameRegExp/4;
--- a/python/ql/test/3/library-tests/with/test.expected
+++ b/python/ql/test/3/library-tests/with/test.expected
@@ -11,6 +11,4 @@
 | test.py:4:5:4:17 | CtxManager3() |
 | test.py:4:5:4:29 | With |
 | test.py:4:22:4:29 | example3 |
-| test.py:4:31:4:30 |  |
-| test.py:4:31:4:30 | With |
 | test.py:6:5:6:8 | Pass |
--- a/python/ql/test/query-tests/Security/CWE-020-IncompleteHostnameRegExp/IncompleteHostnameRegExp.expected
+++ b/python/ql/test/query-tests/Security/CWE-020-IncompleteHostnameRegExp/IncompleteHostnameRegExp.expected
@@ -1 +1 @@
-| hosttest.py:6:27:6:51 | Str | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
+| hosttest.py:6:31:6:53 | (www\|beta).example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | hosttest.py:6:27:6:51 | ControlFlowNode for Str | here |
--- a/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll
+++ b/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll
@@ -0,0 +1,6 @@
+/**
+ * This file contains imports required for the Ruby version of `ConceptsShared.qll`.
+ * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages.
+ */
+
+import codeql.ruby.DataFlow
--- a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
+++ b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll
@@ -0,0 +1,13 @@
+/**
+ * Provides Concepts which are shared across languages.
+ *
+ * Each language has a language specific `Concepts.qll` file that can import the
+ * shared concepts from this file. A language can either re-export the concept directly,
+ * or can add additional member-predicates that are needed for that language.
+ *
+ * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from
+ * each language, but we will maintain a discipline of moving those concepts to
+ * `ConceptsShared.qll` ASAP.
+ */
+
+private import ConceptsImports
				`@@ -0,0 +1 @@`
				`semmle-extractor-options: /r:System.Linq.dll`