Merge branch 'main' into main

2026-04-24 16:25:15 +02:00 · 2023-04-17 15:10:32 +01:00
parent 44c1b48f94 d975ceb648
commit 3526b74ce7
33 changed files with 806 additions and 176 deletions
--- a/cpp/ql/src/Critical/MissingCheckScanf.ql
+++ b/cpp/ql/src/Critical/MissingCheckScanf.ql
@@ -16,160 +16,133 @@
 import cpp
 import semmle.code.cpp.commons.Scanf
 import semmle.code.cpp.controlflow.Guards
-import semmle.code.cpp.ir.dataflow.DataFlow
+import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
 import semmle.code.cpp.ir.IR
 import semmle.code.cpp.ir.ValueNumbering

-/**
- * Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
- *
- * Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
- * of `call`, and `vn` is the value number of `instr.`
- */
-predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
-  output = call.getOutputArgument(index).getFullyConverted() and
-  instr.getConvertedResultExpression() = output and
-  vn.getAnInstruction() = instr
+/** Holds if `n` reaches an argument  to a call to a `scanf`-like function. */
+pragma[nomagic]
+predicate revFlow0(Node n) {
+  isSink(_, _, n, _)
+  or
+  exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
 }

 /**
- * Holds if `instr` is control-flow reachable in 0 or more steps from
- * a call to a `scanf`-like function.
+ * Holds if `n` represents an uninitialized stack-allocated variable, or a
+ * newly (and presumed uninitialized) heap allocation.
 */
+predicate isUninitialized(Node n) {
+  exists(n.asUninitialized()) or
+  n.asIndirectExpr(1) instanceof AllocationExpr
+}
+
 pragma[nomagic]
-predicate fwdFlow0(Instruction instr) {
-  isSource(_, _, instr, _, _)
-  or
-  exists(Instruction prev |
-    fwdFlow0(prev) and
-    prev.getASuccessor() = instr
+predicate fwdFlow0(Node n) {
+  revFlow0(n) and
+  (
+    isUninitialized(n)
+    or
+    exists(Node prev |
+      fwdFlow0(prev) and
+      localFlowStep(prev, n)
+    )
  )
 }

-/**
- * Holds if `instr` is part of the IR translation of `access` that
- * is not an expression being deallocated, and `instr` has value
- * number `vn`.
- */
-predicate isSink(Instruction instr, Access access, ValueNumber vn) {
-  instr.getAst() = access and
-  not any(DeallocationExpr dealloc).getFreedExpr() = access and
-  vn.getAnInstruction() = instr
+predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
+  input = call.getOutputArgument(index) and
+  n.asIndirectExpr() = input
 }

 /**
- * Holds if `instr` is part of a path from a call to a `scanf`-like function
+ * Holds if `call` is a `scanf`-like call and `output` is the `index`'th
+ * argument that has not been previously initialized.
+ */
+predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
+  exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
+}
+
+/**
+ * Holds if `call` is a `scanf`-like function that may write to `output` at
+ * index `index` and `n` is the dataflow node that represents the data after
+ * it has been written to by `call`.
+ */
+predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
+  isRelevantScanfCall(call, index, output) and
+  output = call.getOutputArgument(index) and
+  n.asDefiningArgument() = output
+}
+
+/**
+ * Holds if `n` is reachable from an output argument of a relevant call to
+ * a `scanf`-like function.
+ */
+pragma[nomagic]
+predicate fwdFlow(Node n) {
+  isSource(_, _, n, _)
+  or
+  exists(Node prev |
+    fwdFlow(prev) and
+    localFlowStep(prev, n) and
+    not isSanitizerOut(prev)
+  )
+}
+
+/** Holds if `n` should not have outgoing flow. */
+predicate isSanitizerOut(Node n) {
+  // We disable flow out of sinks to reduce result duplication
+  isSink(n, _)
+  or
+  // If the node is being passed to a function it may be
+  // modified, and thus it's safe to later read the value.
+  exists(n.asIndirectArgument())
+}
+
+/**
+ * Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
+ * argument of a deallocation expression.
+ */
+predicate isSink(Node n, Expr e) {
+  n.asExpr() = e and
+  not any(DeallocationExpr dealloc).getFreedExpr() = e
+}
+
+/**
+ * Holds if `n` is part of a path from a call to a `scanf`-like function
 * to a use of the written variable.
 */
 pragma[nomagic]
-predicate revFlow0(Instruction instr) {
-  fwdFlow0(instr) and
+predicate revFlow(Node n) {
+  fwdFlow(n) and
  (
-    isSink(instr, _, _)
+    isSink(n, _)
    or
-    exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
-  )
-}
-
-/**
- * Holds if `instr` is part of a path from a call to a `scanf`-like function
- * that writes to a variable with value number `vn`, without passing through
- * redefinitions of the variable.
- */
-pragma[nomagic]
-private predicate fwdFlow(Instruction instr, ValueNumber vn) {
-  revFlow0(instr) and
-  (
-    isSource(_, _, instr, vn, _)
-    or
-    exists(Instruction prev |
-      fwdFlow(prev, vn) and
-      prev.getASuccessor() = instr and
-      not isBarrier(instr, vn)
+    exists(Node succ |
+      revFlow(succ) and
+      localFlowStep(n, succ) and
+      not isSanitizerOut(n)
    )
  )
 }

-/**
- * Holds if `instr` is part of a path from a call to a `scanf`-like function
- * that writes to a variable with value number `vn`, without passing through
- * redefinitions of the variable.
- *
- * Note: This predicate only holds for the `(intr, vn)` pairs that are also
- * control-flow reachable from an argument to a `scanf`-like function call.
- */
-pragma[nomagic]
-predicate revFlow(Instruction instr, ValueNumber vn) {
-  fwdFlow(instr, pragma[only_bind_out](vn)) and
-  (
-    isSink(instr, _, vn)
-    or
-    exists(Instruction succ | revFlow(succ, vn) |
-      instr.getASuccessor() = succ and
-      not isBarrier(succ, vn)
-    )
-  )
+/** A local flow step, restricted to relevant dataflow nodes. */
+private predicate step(Node n1, Node n2) {
+  revFlow(n1) and
+  revFlow(n2) and
+  localFlowStep(n1, n2)
 }

-/**
- * A type that bundles together a reachable instruction with the appropriate
- * value number (i.e., the value number that's transferred from the source
- * to the sink).
- */
-newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
-
-class Node extends MkNode {
-  ValueNumber vn;
-  Instruction instr;
-
-  Node() { this = MkNode(instr, vn) }
-
-  final string toString() { result = instr.toString() }
-
-  final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
-
-  final Location getLocation() { result = instr.getLocation() }
-}
-
-/**
- * Holds if `instr` is an instruction with value number `vn` that is
- * used in a store operation, or is overwritten by another call to
- * a `scanf`-like function.
- */
-private predicate isBarrier(Instruction instr, ValueNumber vn) {
-  // We only need to compute barriers for instructions that we
-  // managed to hit during the initial flow stage.
-  revFlow0(pragma[only_bind_into](instr)) and
-  valueNumber(instr) = vn and
-  exists(Expr e | instr.getAst() = e |
-    instr = any(StoreInstruction s).getDestinationAddress()
-    or
-    isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
-  )
-}
-
-/** Holds if `n1` steps to `n2` in a single step. */
-predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
-
-predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
-
-Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
+predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)

 /**
 * Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
- * an instruction that is part of the translation of `access` which is a transitive
- * control-flow successor of `call`.
- *
- * Furthermore, `source` and `sink` have identical global value numbers.
+ * a dataflow node that represents the expression `e`.
 */
-predicate hasFlow(
-  Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
-) {
-  exists(ValueNumber vn |
-    isSource(call, index, source, vn, _) and
-    hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
-    isSink(sink, access, vn)
-  )
+predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
+  isSource(call, index, source, _) and
+  hasFlow(source, sink) and
+  isSink(sink, e)
 }

 /**
@@ -177,7 +150,7 @@ predicate hasFlow(
 * success in writing the output argument at index `index`.
 */
 int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
-  isSource(call, index, _, _, _) and
+  isSource(call, index, _, _) and
  result =
    index + 1 -
      count(ScanfFormatLiteral f, int n |
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
 * Holds the access to `e` isn't guarded by a check that ensures that `call` returned
 * at least `minGuard`.
 */
-predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
+predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
  exists(int index |
    hasFlow(_, call, index, _, e) and
    minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
  exists(GuardCondition g, Expr left, Expr right |
    right = g.getAChild() and
    value = left.getValue().toInt() and
-    DataFlow::localExprFlow(call, right)
+    localExprFlow(call, right)
  |
    g.ensuresEq(left, right, 0, result, true) and op = "=="
    or
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
  )
 }

-from ScanfFunctionCall call, Access access, int minGuard
-where hasNonGuardedAccess(call, access, minGuard)
-select access,
+from ScanfFunctionCall call, Expr e, int minGuard
+where hasNonGuardedAccess(call, e, minGuard)
+select e,
  "This variable is read, but may not have been written. " +
    "It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
  call.toString()
--- a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.expected
+++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/MissingCheckScanf.expected
@@ -1,9 +1,8 @@
 | test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
-| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
 | test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
 | test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
-| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
-| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
+| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
+| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
 | test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
 | test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
 | test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
@@ -12,13 +11,9 @@
 | test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
 | test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
 | test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
-| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
 | test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
 | test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
 | test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
-| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
-| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
 | test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
 | test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
 | test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
-| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |
--- a/cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp
+++ b/cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp
@@ -48,7 +48,7 @@ int main()
 		int i = 0;

 		scanf("%d", &i);
-		use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
+		use(i); // GOOD. Design choice: already initialized variables are fine.
 	}

 	{
@@ -261,7 +261,7 @@ int main()
 		i = 0;

 		scanf("%d", &i);
-		use(i); // BAD
+		use(i); // GOOD
 	}

 	{
@@ -269,7 +269,7 @@ int main()

 		set_by_ref(i);
 		scanf("%d", &i);
-		use(i); // BAD
+		use(i); // GOOD [FALSE POSITIVE]
 	}

 	{
@@ -277,7 +277,7 @@ int main()

 		set_by_ptr(&i);
 		scanf("%d", &i);
-		use(i); // BAD
+		use(i); // GOOD [FALSE POSITIVE]
 	}

 	{
@@ -299,7 +299,7 @@ int main()
 		int *ptr_i = &i;

 		scanf("%d", &i);
-		use(*ptr_i); // BAD: may not have written `i`
+		use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
 	}

 	{
@@ -307,7 +307,7 @@ int main()
 		int *ptr_i = &i;

 		scanf("%d", ptr_i);
-		use(i); // BAD: may not have written `*ptr_i`
+		use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
 	}

 	{
@@ -427,5 +427,5 @@ void scan_and_write() {
 void scan_and_static_variable() {
 	static int i;
 	scanf("%d", &i);
-	use(i);  // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
+	use(i);  // GOOD: static variables are always 0-initialized
 }
--- a/javascript/ql/lib/semmle/javascript/Extend.qll
+++ b/javascript/ql/lib/semmle/javascript/Extend.qll
@@ -157,10 +157,12 @@ private class FunctionalExtendCallShallow extends ExtendCall {
 }

 /**
- * A taint propagating data flow edge from the objects flowing into an extend call to its return value
+ * A value-preserving data flow edge from the objects flowing into an extend call to its return value
 * and to the source of the destination object.
+ *
+ * Since all object properties are preserved, we model this as a value-preserving step.
 */
-private class ExtendCallTaintStep extends TaintTracking::SharedTaintStep {
+private class ExtendCallStep extends PreCallGraphStep {
  override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
    exists(ExtendCall extend |
      pred = extend.getASourceOperand() and succ = extend.getDestinationOperand().getALocalSource()
--- a/javascript/ql/lib/semmle/javascript/dataflow/Configuration.qll
+++ b/javascript/ql/lib/semmle/javascript/dataflow/Configuration.qll
@@ -806,6 +806,10 @@ private predicate basicFlowStepNoBarrier(
  callStep(pred, succ) and
  summary = PathSummary::call()
  or
+  // Implied receiver flow
+  CallGraph::impliedReceiverStep(pred, succ) and
+  summary = PathSummary::call()
+  or
  // Flow out of function
  returnStep(pred, succ) and
  summary = PathSummary::return()
--- a/javascript/ql/lib/semmle/javascript/dataflow/internal/CallGraphs.qll
+++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/CallGraphs.qll
@@ -241,22 +241,26 @@ module CallGraph {
    )
  }

-  private predicate shouldTrackObjectWithMethods(DataFlow::SourceNode node) {
+  private DataFlow::FunctionNode getAMethodOnPlainObject(DataFlow::SourceNode node) {
    (
      (
        node instanceof DataFlow::ObjectLiteralNode
        or
        node instanceof DataFlow::FunctionNode
      ) and
-      node.getAPropertySource() instanceof DataFlow::FunctionNode
+      result = node.getAPropertySource()
      or
-      exists(node.(DataFlow::ObjectLiteralNode).getPropertyGetter(_))
+      result = node.(DataFlow::ObjectLiteralNode).getPropertyGetter(_)
      or
-      exists(node.(DataFlow::ObjectLiteralNode).getPropertySetter(_))
+      result = node.(DataFlow::ObjectLiteralNode).getPropertySetter(_)
    ) and
    not node.getTopLevel().isExterns()
  }

+  private predicate shouldTrackObjectWithMethods(DataFlow::SourceNode node) {
+    exists(getAMethodOnPlainObject(node))
+  }
+
  /**
   * Gets a step summary for tracking object literals.
   *
@@ -273,4 +277,22 @@ module CallGraph {
    or
    StepSummary::step(getAnAllocationSiteRef(node), result, objectWithMethodsStep())
  }
+
+  /**
+   * Holds if `pred` is assumed to flow to `succ` because a method is stored on an object that is assumed
+   * to be the receiver of calls to that method.
+   *
+   * For example, object literal below is assumed to flow to the receiver of the `foo` function:
+   * ```js
+   * let obj = {};
+   * obj.foo = function() {}
+   * ```
+   */
+  cached
+  predicate impliedReceiverStep(DataFlow::SourceNode pred, DataFlow::SourceNode succ) {
+    exists(DataFlow::SourceNode host |
+      pred = getAnAllocationSiteRef(host) and
+      succ = getAMethodOnPlainObject(host).getReceiver()
+    )
+  }
 }
--- a/javascript/ql/lib/semmle/javascript/dataflow/internal/StepSummary.qll
+++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/StepSummary.qll
@@ -94,6 +94,10 @@ private module Cached {
    DataFlow::localFieldStep(pred, succ) and
    summary = LevelStep()
    or
+    // Implied flow of host object into 'this' of a method
+    CallGraph::impliedReceiverStep(pred, succ) and
+    summary = CallStep()
+    or
    exists(string prop |
      basicStoreStep(pred, succ, prop) and
      summary = StoreStep(prop)
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/PrototypePollutingAssignmentQuery.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/PrototypePollutingAssignmentQuery.qll
@@ -55,6 +55,22 @@ class Configuration extends TaintTracking::Configuration {
    )
  }

+  override predicate isSanitizerEdge(
+    DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel lbl
+  ) {
+    // Suppress the value-preserving step src -> dst in `extend(dst, src)`. This is modeled as a value-preserving
+    // step because it preserves all properties, but the destination is not actually Object.prototype.
+    exists(ExtendCall call |
+      pred = call.getASourceOperand() and
+      (
+        succ = call.getDestinationOperand().getALocalSource()
+        or
+        succ = call
+      ) and
+      lbl instanceof ObjectPrototype
+    )
+  }
+
  override predicate isAdditionalFlowStep(
    DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
  ) {
--- a/javascript/ql/lib/semmle/javascript/security/dataflow/UnsafeJQueryPluginCustomizations.qll
+++ b/javascript/ql/lib/semmle/javascript/security/dataflow/UnsafeJQueryPluginCustomizations.qll
@@ -31,6 +31,13 @@ module UnsafeJQueryPlugin {
   */
  abstract class Sanitizer extends DataFlow::Node { }

+  /**
+   * The receiver of a function, seen as a sanitizer.
+   *
+   * Plugins often do `$(this)` to coerce an existing DOM element to a jQuery object.
+   */
+  private class ThisSanitizer extends Sanitizer instanceof DataFlow::ThisNode { }
+
  /**
   * An argument that may act as an HTML fragment rather than a CSS selector, as a sink for remote unsafe jQuery plugins.
   */
--- a/javascript/ql/src/change-notes/2023-04-14-more-call-graph-steps.md
+++ b/javascript/ql/src/change-notes/2023-04-14-more-call-graph-steps.md
@@ -0,0 +1,5 @@
+---
+category: minorAnalysis
+---
+* Improved the call graph to better handle the case where a function is stored on
+  a plain object and subsequently copied to a new host object via an `extend` call.
--- a/javascript/ql/test/query-tests/Security/CWE-020/UntrustedDataToExternalAPI/UntrustedDataToExternalAPI.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/UntrustedDataToExternalAPI/UntrustedDataToExternalAPI.expected
@@ -36,6 +36,8 @@ nodes
 | tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted |
 | tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted |
 | tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted |
+| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
+| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
 | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} |
 | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} |
 | tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted |
@@ -83,6 +85,8 @@ edges
 | tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) | tst-UntrustedDataToExternalAPI.js:24:20:24:42 | [JSON.p ... usted)] |
 | tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) | tst-UntrustedDataToExternalAPI.js:24:20:24:42 | [JSON.p ... usted)] |
 | tst-UntrustedDataToExternalAPI.js:24:32:24:40 | untrusted | tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) |
+| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
+| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
 | tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} |
 | tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} |
 | tst-UntrustedDataToExternalAPI.js:43:8:43:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} |
@@ -101,4 +105,5 @@ edges
 | tst-UntrustedDataToExternalAPI.js:30:13:30:30 | getDeepUntrusted() | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:30:13:30:30 | getDeepUntrusted() | Call to external-lib() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
 | tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted | Call to external-lib.get.[callback].[param 'res'].send() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
 | tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted | Call to external-lib.get.[callback].[param 'req'].app.locals.something.foo() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
+| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} | Call to lodash.merge() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
 | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n    x ... usted\\n} | Call to lodash.merge() [param 1] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
--- a/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/Xss.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/Xss.expected
@@ -1119,6 +1119,10 @@ nodes
 | tst.js:494:18:494:30 | location.hash |
 | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:40 | locatio ... bstr(1) |
+| tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash |
+| tst.js:501:43:501:62 | window.location.hash |
 | typeahead.js:20:13:20:45 | target |
 | typeahead.js:20:22:20:45 | documen ... .search |
 | typeahead.js:20:22:20:45 | documen ... .search |
@@ -2271,6 +2275,10 @@ edges
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
 | typeahead.js:20:13:20:45 | target | typeahead.js:21:12:21:17 | target |
 | typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:20:13:20:45 | target |
 | typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:20:13:20:45 | target |
@@ -2559,6 +2567,7 @@ edges
 | tst.js:486:22:486:24 | url | tst.js:471:13:471:36 | documen ... .search | tst.js:486:22:486:24 | url | Cross-site scripting vulnerability due to $@. | tst.js:471:13:471:36 | documen ... .search | user-provided value |
 | tst.js:491:23:491:45 | locatio ... bstr(1) | tst.js:491:23:491:35 | location.hash | tst.js:491:23:491:45 | locatio ... bstr(1) | Cross-site scripting vulnerability due to $@. | tst.js:491:23:491:35 | location.hash | user-provided value |
 | tst.js:494:18:494:40 | locatio ... bstr(1) | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) | Cross-site scripting vulnerability due to $@. | tst.js:494:18:494:30 | location.hash | user-provided value |
+| tst.js:501:33:501:63 | decodeU ... n.hash) | tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) | Cross-site scripting vulnerability due to $@. | tst.js:501:43:501:62 | window.location.hash | user-provided value |
 | typeahead.js:25:18:25:20 | val | typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:25:18:25:20 | val | Cross-site scripting vulnerability due to $@. | typeahead.js:20:22:20:45 | documen ... .search | user-provided value |
 | v-html.vue:2:8:2:23 | v-html=tainted | v-html.vue:6:42:6:58 | document.location | v-html.vue:2:8:2:23 | v-html=tainted | Cross-site scripting vulnerability due to $@. | v-html.vue:6:42:6:58 | document.location | user-provided value |
 | various-concat-obfuscations.js:4:4:4:31 | "<div>" ... </div>" | various-concat-obfuscations.js:2:16:2:39 | documen ... .search | various-concat-obfuscations.js:4:4:4:31 | "<div>" ... </div>" | Cross-site scripting vulnerability due to $@. | various-concat-obfuscations.js:2:16:2:39 | documen ... .search | user-provided value |
--- a/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/XssWithAdditionalSources.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/XssWithAdditionalSources.expected
@@ -1131,6 +1131,10 @@ nodes
 | tst.js:494:18:494:30 | location.hash |
 | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:40 | locatio ... bstr(1) |
+| tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash |
+| tst.js:501:43:501:62 | window.location.hash |
 | typeahead.js:9:28:9:30 | loc |
 | typeahead.js:9:28:9:30 | loc |
 | typeahead.js:9:28:9:30 | loc |
@@ -2333,6 +2337,10 @@ edges
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
 | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
+| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
 | typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |
 | typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |
 | typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |
--- a/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-079/DomBasedXss/tst.js
@@ -313,7 +313,7 @@ function basicExceptions() {
 }

 function handlebarsSafeString() {
-	return new Handlebars.SafeString(location); // NOT OK!	
+	return new Handlebars.SafeString(location); // NOT OK!
 }

 function test2() {
@@ -355,15 +355,15 @@ function thisNodes() {
 	    var target = document.location.search
 	    this.html(target); // NOT OK. (this is a jQuery object)
 		this.innerHTML = target // OK. (this is a jQuery object)
-	
+
 		this.each(function (i, e) {
 			this.innerHTML = target; // NOT OK. (this is a DOM-node);
 			this.html(target); // OK. (this is a DOM-node);
-			
+
 			e.innerHTML = target; // NOT OK.
 		});
 	}
-	$.fn[pluginName] = myPlugin; 
+	$.fn[pluginName] = myPlugin;

 }

@@ -380,7 +380,7 @@ function test() {
 function test() {
  var target = document.location.search

-  
+
  $('myId').html(target); // NOT OK

  $('myId').html(target.taint); // NOT OK
@@ -401,7 +401,7 @@ function test() {
  if (random()) {return;}
  $('myId').html(target.taint6); // OK

-  
+
  if (random()) {target.taint7 = "safe";}
  $('myId').html(target.taint7); // NOT OK

@@ -493,3 +493,13 @@ function urlStuff() {
  const myHistory = require('history').createBrowserHistory();
  myHistory.push(location.hash.substr(1)); // NOT OK
 }
+
+function Foo() {
+  this.foo = document;
+  var obj = {
+    bar: function() {
+      this.foo.body.innerHTML = decodeURI(window.location.hash); // NOT OK
+    }
+  };
+  Object.assign(this, obj);
+}
--- a/javascript/ql/test/query-tests/Security/CWE-079/UnsafeJQueryPlugin/unsafe-jquery-plugin.js
+++ b/javascript/ql/test/query-tests/Security/CWE-079/UnsafeJQueryPlugin/unsafe-jquery-plugin.js
@@ -188,8 +188,18 @@
 		}
 		// extending options
 		options = $.extend( {}, options );
-	
+
 		var target = $( options.of ); // NOT OK
 		console.log(target);
 	};
+
+	$.fn.blockReceiver = function( options ) {
+		$.extend({
+				foo() {
+					$(this); // OK
+				}
+			},
+			options,
+		);
+	};
 });
--- a/javascript/ql/test/query-tests/Security/CWE-915/PrototypePollutingAssignment/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-915/PrototypePollutingAssignment/tst.js
@@ -103,11 +103,23 @@ app.get('/bar', (req, res) => {

    let object = {};
    object[taint][taint] = taint; // NOT OK
-    
+
    const bad = ["__proto__", "constructor"];
    if (bad.includes(taint)) {
        return;
    }

    object[taint][taint] = taint; // OK
-});
+});
+
+app.get('/assign', (req, res) => {
+    let taint = String(req.query.data);
+    let plainObj = {};
+
+    let object = Object.assign({}, plainObj[taint]);
+    object[taint] = taint; // OK - 'object' is not Object.prototype itself (but possibly a copy)
+
+    let dest = {};
+    Object.assign(dest, plainObj[taint]);
+    dest[taint] = taint; // OK - 'dest' is not Object.prototype itself (but possibly a copy)
+});
--- a/misc/codegen/templates/cpp_classes_h.mustache
+++ b/misc/codegen/templates/cpp_classes_h.mustache
@@ -5,6 +5,8 @@
 #include <iostream>
 #include <optional>
 #include <vector>
+#include <binlog/binlog.hpp>
+#include <binlog/adapt_stdoptional.hpp>

 #include "{{trap_library}}/TrapLabel.h"
 #include "{{trap_library}}/TrapTagTraits.h"
@@ -80,3 +82,9 @@ struct detail::ToTrapClassFunctor<{{name}}Tag> {
 };
 {{/classes}}
 }
+
+{{#classes}}
+{{#final}}
+BINLOG_ADAPT_STRUCT(codeql::{{name}}, id{{> cpp_list_fields}});
+{{/final}}
+{{/classes}}
--- a/misc/codegen/templates/cpp_list_fields.mustache
+++ b/misc/codegen/templates/cpp_list_fields.mustache
@@ -0,0 +1 @@
+{{#bases}}{{#ref}}{{> cpp_list_fields}}{{/ref}}{{/bases}}{{#fields}}, {{field_name}}{{/fields}}
--- a/misc/codegen/templates/trap_traps_h.mustache
+++ b/misc/codegen/templates/trap_traps_h.mustache
@@ -4,6 +4,7 @@

 #include <iostream>
 #include <string>
+#include <binlog/binlog.hpp>

 #include "{{trap_library_dir}}/TrapLabel.h"
 #include "{{trap_library_dir}}/TrapTagTraits.h"
@@ -43,3 +44,7 @@ struct ToBindingTrapFunctor<{{type}}> {
 {{/id}}
 {{/traps}}
 }
+
+{{#traps}}
+BINLOG_ADAPT_STRUCT(codeql::{{name}}Trap{{#fields}}, {{field_name}}{{/fields}});
+{{/traps}}
--- a/swift/README.md
+++ b/swift/README.md
@@ -4,6 +4,8 @@

 The Swift CodeQL package is an experimental and unsupported work in progress.

+##
+
 ## Building the Swift extractor

 First ensure you have Bazel installed, for example with
@@ -28,7 +30,9 @@ set up the search path
 in [the per-user CodeQL configuration file](https://docs.github.com/en/code-security/codeql-cli/using-the-codeql-cli/specifying-command-options-in-a-codeql-configuration-file#using-a-codeql-configuration-file)
 .

-## Code generation
+## Development
+
+### Code generation

 Run

@@ -41,7 +45,27 @@ to update generated files. This can be shortened to

 You can also run `../misc/codegen/codegen.py`, as long as you are beneath the `swift` directory.

-## IDE setup
+### Logging configuration
+
+A log file is produced for each run under `CODEQL_EXTRACTOR_SWIFT_LOG_DIR` (the usual DB log directory).
+
+You can use the environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` to configure levels for
+loggers and outputs. This must have the form of a comma separated `spec:min_level` list, where
+`spec` is either a glob pattern (made up of alphanumeric, `/`, `*` and `.` characters) for
+matching logger names or one of `out:bin`, `out:text` or `out:console`, and `min_level` is one
+of `trace`, `debug`, `info`, `warning`, `error`, `critical` or `no_logs` to turn logs completely off.
+
+Current output default levels are no binary logs, `info` logs or higher in the text file and `warning` logs or higher on
+standard error. By default, all loggers are configured with the lowest logging level of all outputs (`info` by default).
+Logger names are visible in the textual logs between `[...]`. Examples are `extractor/dispatcher`
+or `extractor/<source filename>.trap`. An example of `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` usage is the following:
+
+```bash
+export CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS=out:console:trace,out:text:no_logs,*:warning,*.trap:trace
+```
+
+This will turn off generation of a text log file, redirecting all logs to standard error, but will make all loggers only
+write warnings or above, except for trap emission logs which will output all logs.

 ### CLion and the native bazel plugin

@@ -84,3 +108,7 @@ In particular for breakpoints to work you might need to setup the following remo
 |-------------|--------------------------------------|
 | `swift`     | `/absolute/path/to/codeql/swift`     |
 | `bazel-out` | `/absolute/path/to/codeql/bazel-out` |
+
+### Thread safety
+
+The extractor is single-threaded, and there was no effort to make anything in it thread-safe.
--- a/swift/extractor/infra/BUILD.bazel
+++ b/swift/extractor/infra/BUILD.bazel
@@ -8,6 +8,7 @@ swift_cc_library(
    deps = [
        "//swift/extractor/config",
        "//swift/extractor/infra/file",
+        "//swift/extractor/infra/log",
        "//swift/extractor/trap",
        "//swift/third_party/swift-llvm-support",
    ],
--- a/swift/extractor/infra/SwiftDispatcher.h
+++ b/swift/extractor/infra/SwiftDispatcher.h
@@ -13,6 +13,7 @@
 #include "swift/extractor/infra/SwiftLocationExtractor.h"
 #include "swift/extractor/infra/SwiftBodyEmissionStrategy.h"
 #include "swift/extractor/config/SwiftExtractorState.h"
+#include "swift/extractor/infra/log/SwiftLogging.h"

 namespace codeql {

@@ -151,7 +152,9 @@ class SwiftDispatcher {
      return *l;
    }
    waitingForNewLabel = e;
+    // TODO: add tracing logs for visited stuff, maybe within the translators?
    visit(e, std::forward<Args>(args)...);
+    Log::flush();
    // TODO when everything is moved to structured C++ classes, this should be moved to createEntry
    if (auto l = store.get(e)) {
      if constexpr (IsLocatable<E>) {
@@ -329,6 +332,7 @@ class SwiftDispatcher {
  SwiftBodyEmissionStrategy& bodyEmissionStrategy;
  Store::Handle waitingForNewLabel{std::monostate{}};
  std::unordered_set<swift::ModuleDecl*> encounteredModules;
+  Logger logger{"dispatcher"};
 };

 }  // namespace codeql
--- a/swift/extractor/infra/file/TargetFile.h
+++ b/swift/extractor/infra/file/TargetFile.h
@@ -36,6 +36,8 @@ class TargetFile {
    return *this;
  }

+  const std::filesystem::path& target() const { return targetPath; }
+
 private:
  TargetFile(const std::filesystem::path& target,
             const std::filesystem::path& targetDir,
--- a/swift/extractor/infra/log/BUILD.bazel
+++ b/swift/extractor/infra/log/BUILD.bazel
@@ -0,0 +1,7 @@
+cc_library(
+    name = "log",
+    srcs = glob(["*.cpp"]),
+    hdrs = glob(["*.h"]),
+    visibility = ["//visibility:public"],
+    deps = ["@binlog"],
+)
--- a/swift/extractor/infra/log/SwiftLogging.cpp
+++ b/swift/extractor/infra/log/SwiftLogging.cpp
@@ -0,0 +1,163 @@
+#include "swift/extractor/infra/log/SwiftLogging.h"
+
+#include <filesystem>
+#include <stdlib.h>
+#include <optional>
+
+#define LEVEL_REGEX_PATTERN "trace|debug|info|warning|error|critical|no_logs"
+
+BINLOG_ADAPT_ENUM(codeql::Log::Level, trace, debug, info, warning, error, critical, no_logs)
+
+namespace codeql {
+
+namespace {
+using LevelRule = std::pair<std::regex, Log::Level>;
+using LevelRules = std::vector<LevelRule>;
+
+Log::Level getLevelFor(std::string_view name, const LevelRules& rules, Log::Level dflt) {
+  for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
+    if (std::regex_match(std::begin(name), std::end(name), it->first)) {
+      return it->second;
+    }
+  }
+  return dflt;
+}
+
+const char* getEnvOr(const char* var, const char* dflt) {
+  if (const char* ret = getenv(var)) {
+    return ret;
+  }
+  return dflt;
+}
+
+std::string_view matchToView(std::csub_match m) {
+  return {m.first, static_cast<size_t>(m.length())};
+}
+
+Log::Level stringToLevel(std::string_view v) {
+  if (v == "trace") return Log::Level::trace;
+  if (v == "debug") return Log::Level::debug;
+  if (v == "info") return Log::Level::info;
+  if (v == "warning") return Log::Level::warning;
+  if (v == "error") return Log::Level::error;
+  if (v == "critical") return Log::Level::critical;
+  return Log::Level::no_logs;
+}
+
+Log::Level matchToLevel(std::csub_match m) {
+  return stringToLevel(matchToView(m));
+}
+
+}  // namespace
+
+std::vector<std::string> Log::collectSeverityRulesAndReturnProblems(const char* envVar) {
+  std::vector<std::string> problems;
+  if (auto levels = getEnvOr(envVar, nullptr)) {
+    // expect comma-separated <glob pattern>:<log severity>
+    std::regex comma{","};
+    std::regex levelAssignment{R"((?:([*./\w]+)|(?:out:(bin|text|console))):()" LEVEL_REGEX_PATTERN
+                               ")"};
+    std::cregex_token_iterator begin{levels, levels + strlen(levels), comma, -1};
+    std::cregex_token_iterator end{};
+    for (auto it = begin; it != end; ++it) {
+      std::cmatch match;
+      if (std::regex_match(it->first, it->second, match, levelAssignment)) {
+        auto level = matchToLevel(match[3]);
+        if (match[1].matched) {
+          auto pattern = match[1].str();
+          // replace all "*" with ".*" and all "." with "\.", turning the glob pattern into a regex
+          std::string::size_type pos = 0;
+          while ((pos = pattern.find_first_of("*.", pos)) != std::string::npos) {
+            pattern.insert(pos, (pattern[pos] == '*') ? "." : "\\");
+            pos += 2;
+          }
+          sourceRules.emplace_back(pattern, level);
+        } else {
+          auto out = matchToView(match[2]);
+          if (out == "bin") {
+            binary.level = level;
+          } else if (out == "text") {
+            text.level = level;
+          } else if (out == "console") {
+            console.level = level;
+          }
+        }
+      } else {
+        problems.emplace_back("Malformed log level rule: " + it->str());
+      }
+    }
+  }
+  return problems;
+}
+
+void Log::configure() {
+  // as we are configuring logging right now, we collect problems and log them at the end
+  auto problems = collectSeverityRulesAndReturnProblems("CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS");
+  if (text || binary) {
+    std::filesystem::path logFile = getEnvOr("CODEQL_EXTRACTOR_SWIFT_LOG_DIR", ".");
+    logFile /= logRootName;
+    logFile /= std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
+    std::error_code ec;
+    std::filesystem::create_directories(logFile.parent_path(), ec);
+    if (!ec) {
+      if (text) {
+        logFile.replace_extension(".log");
+        textFile.open(logFile);
+        if (!textFile) {
+          problems.emplace_back("Unable to open text log file " + logFile.string());
+          text.level = Level::no_logs;
+        }
+      }
+      if (binary) {
+        logFile.replace_extension(".blog");
+        binary.output.open(logFile, std::fstream::out | std::fstream::binary);
+        if (!binary.output) {
+          problems.emplace_back("Unable to open binary log file " + logFile.string());
+          binary.level = Level::no_logs;
+        }
+      }
+    } else {
+      problems.emplace_back("Unable to create log directory " + logFile.parent_path().string() +
+                            ": " + ec.message());
+      binary.level = Level::no_logs;
+      text.level = Level::no_logs;
+    }
+  }
+  for (const auto& problem : problems) {
+    LOG_ERROR("{}", problem);
+  }
+  LOG_INFO("Logging configured (binary: {}, text: {}, console: {})", binary.level, text.level,
+           console.level);
+  flushImpl();
+}
+
+void Log::flushImpl() {
+  session.consume(*this);
+}
+
+Log::LoggerConfiguration Log::getLoggerConfigurationImpl(std::string_view name) {
+  LoggerConfiguration ret{session, std::string{logRootName}};
+  ret.fullyQualifiedName += '/';
+  ret.fullyQualifiedName += name;
+  ret.level = std::min({binary.level, text.level, console.level});
+  ret.level = getLevelFor(ret.fullyQualifiedName, sourceRules, ret.level);
+  // avoid Logger constructor loop
+  if (name != "logging") {
+    LOG_DEBUG("Configuring logger {} with level {}", ret.fullyQualifiedName, ret.level);
+  }
+  return ret;
+}
+
+Log& Log::write(const char* buffer, std::streamsize size) {
+  if (text) text.write(buffer, size);
+  if (binary) binary.write(buffer, size);
+  if (console) console.write(buffer, size);
+  return *this;
+}
+
+Logger& Log::logger() {
+  static Logger ret{getLoggerConfigurationImpl("logging")};
+  return ret;
+}
+
+}  // namespace codeql
--- a/swift/extractor/infra/log/SwiftLogging.h
+++ b/swift/extractor/infra/log/SwiftLogging.h
@@ -0,0 +1,189 @@
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <regex>
+#include <vector>
+
+#include <binlog/binlog.hpp>
+#include <binlog/TextOutputStream.hpp>
+#include <binlog/EventFilter.hpp>
+#include <binlog/adapt_stdfilesystem.hpp>
+#include <binlog/adapt_stdoptional.hpp>
+#include <binlog/adapt_stdvariant.hpp>
+
+// Logging macros. These will call `logger()` to get a Logger instance, picking up any `logger`
+// defined in the current scope. Domain-specific loggers can be added or used by either:
+// * providing a class field called `logger` (as `Logger::operator()()` returns itself)
+// * declaring a local `logger` variable (to be used for one-time execution like code in `main`)
+// * declaring a `Logger& logger()` function returning a reference to a static local variable
+// * passing a logger around using a `Logger& logger` function parameter
+// They are created with a name that appears in the logs and can be used to filter debug levels (see
+// `Logger`).
+#define LOG_CRITICAL(...) LOG_WITH_LEVEL(codeql::Log::Level::critical, __VA_ARGS__)
+#define LOG_ERROR(...) LOG_WITH_LEVEL(codeql::Log::Level::error, __VA_ARGS__)
+#define LOG_WARNING(...) LOG_WITH_LEVEL(codeql::Log::Level::warning, __VA_ARGS__)
+#define LOG_INFO(...) LOG_WITH_LEVEL(codeql::Log::Level::info, __VA_ARGS__)
+#define LOG_DEBUG(...) LOG_WITH_LEVEL(codeql::Log::Level::debug, __VA_ARGS__)
+#define LOG_TRACE(...) LOG_WITH_LEVEL(codeql::Log::Level::trace, __VA_ARGS__)
+
+// only do the actual logging if the picked up `Logger` instance is configured to handle the
+// provided log level. `LEVEL` must be a compile-time constant. `logger()` is evaluated once
+#define LOG_WITH_LEVEL(LEVEL, ...)                                                                 \
+  do {                                                                                             \
+    constexpr codeql::Log::Level _level = LEVEL;                                                   \
+    codeql::Logger& _logger = logger();                                                            \
+    if (_level >= _logger.level()) {                                                               \
+      BINLOG_CREATE_SOURCE_AND_EVENT(_logger.writer(), _level, /* category */, binlog::clockNow(), \
+                                     __VA_ARGS__);                                                 \
+    }                                                                                              \
+  } while (false)
+
+// avoid calling into binlog's original macros
+#undef BINLOG_CRITICAL
+#undef BINLOG_CRITICAL_W
+#undef BINLOG_CRITICAL_C
+#undef BINLOG_CRITICAL_WC
+#undef BINLOG_ERROR
+#undef BINLOG_ERROR_W
+#undef BINLOG_ERROR_C
+#undef BINLOG_ERROR_WC
+#undef BINLOG_WARNING
+#undef BINLOG_WARNING_W
+#undef BINLOG_WARNING_C
+#undef BINLOG_WARNING_WC
+#undef BINLOG_INFO
+#undef BINLOG_INFO_W
+#undef BINLOG_INFO_C
+#undef BINLOG_INFO_WC
+#undef BINLOG_DEBUG
+#undef BINLOG_DEBUG_W
+#undef BINLOG_DEBUG_C
+#undef BINLOG_DEBUG_WC
+#undef BINLOG_TRACE
+#undef BINLOG_TRACE_W
+#undef BINLOG_TRACE_C
+#undef BINLOG_TRACE_WC
+
+namespace codeql {
+
+// tools should define this to tweak the root name of all loggers
+extern const std::string_view logRootName;
+
+// This class is responsible for the global log state (outputs, log level rules, flushing)
+// State is stored in the singleton `Log::instance()`.
+// Before using logging, `Log::configure("<name>")` should be used (e.g.
+// `Log::configure("extractor")`). Then, `Log::flush()` should be regularly called.
+// Logging is configured upon first usage.  This consists in
+//  * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_DIR` to choose where to dump the log
+//    file(s). Log files will go to a subdirectory thereof named after `logRootName`
+//  * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` to configure levels for
+//    loggers and outputs. This must have the form of a comma separated `spec:level` list, where
+//    `spec` is either a glob pattern (made up of alphanumeric, `/`, `*` and `.` characters) for
+//    matching logger names or one of `out:bin`, `out:text` or `out:console`.
+//    Output default levels can be seen in the corresponding initializers below. By default, all
+//    loggers are configured with the lowest output level
+class Log {
+ public:
+  using Level = binlog::Severity;
+
+  // Internal data required to build `Logger` instances
+  struct LoggerConfiguration {
+    binlog::Session& session;
+    std::string fullyQualifiedName;
+    Level level;
+  };
+
+  // Flush logs to the designated outputs
+  static void flush() { instance().flushImpl(); }
+
+  // create `Logger` configuration, used internally by `Logger`'s constructor
+  static LoggerConfiguration getLoggerConfiguration(std::string_view name) {
+    return instance().getLoggerConfigurationImpl(name);
+  }
+
+ private:
+  static constexpr const char* format = "%u %S [%n] %m (%G:%L)\n";
+
+  Log() { configure(); }
+
+  static Log& instance() {
+    static Log ret;
+    return ret;
+  }
+
+  class Logger& logger();
+
+  void configure();
+  void flushImpl();
+  LoggerConfiguration getLoggerConfigurationImpl(std::string_view name);
+
+  // make `session.consume(*this)` work, which requires access to `write`
+  friend binlog::Session;
+  Log& write(const char* buffer, std::streamsize size);
+
+  // Output filtered according to a configured log level
+  template <typename Output>
+  struct FilteredOutput {
+    binlog::Severity level;
+    Output output;
+    binlog::EventFilter filter{
+        [this](const binlog::EventSource& src) { return src.severity >= level; }};
+
+    template <typename... Args>
+    FilteredOutput(Level level, Args&&... args)
+        : level{level}, output{std::forward<Args>(args)...} {}
+
+    FilteredOutput& write(const char* buffer, std::streamsize size) {
+      filter.writeAllowed(buffer, size, output);
+      return *this;
+    }
+
+    // if configured as `no_logs`, the output is effectively disabled
+    explicit operator bool() const { return level < Level::no_logs; }
+  };
+
+  using LevelRule = std::pair<std::regex, Level>;
+  using LevelRules = std::vector<LevelRule>;
+
+  binlog::Session session;
+  std::ofstream textFile;
+  FilteredOutput<std::ofstream> binary{Level::no_logs};
+  FilteredOutput<binlog::TextOutputStream> text{Level::info, textFile, format};
+  FilteredOutput<binlog::TextOutputStream> console{Level::warning, std::cerr, format};
+  LevelRules sourceRules;
+  std::vector<std::string> collectSeverityRulesAndReturnProblems(const char* envVar);
+};
+
+// This class represent a named domain-specific logger, responsible for pushing logs using the
+// underlying `binlog::SessionWriter` class. This has a configured log level, so that logs on this
+// `Logger` with a level lower than the configured one are no-ops. The level is configured based
+// on rules matching `<logRootName>/<name>` in `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` (see above).
+// `<name>` is provided in the constructor. If no rule matches the name, the log level defaults to
+// the minimum level of all outputs.
+class Logger {
+ public:
+  // configured logger based on name, as explained above
+  explicit Logger(std::string_view name) : Logger(Log::getLoggerConfiguration(name)) {}
+
+  // used internally, public to be accessible to Log for its own logger
+  explicit Logger(Log::LoggerConfiguration&& configuration)
+      : w{configuration.session, queueSize, /* id */ 0,
+          std::move(configuration.fullyQualifiedName)},
+        level_{configuration.level} {}
+
+  binlog::SessionWriter& writer() { return w; }
+  Log::Level level() const { return level_; }
+
+  // make defining a `Logger logger` field be equivalent to providing a `Logger& logger()` function
+  // in order to be picked up by logging macros
+  Logger& operator()() { return *this; }
+
+ private:
+  static constexpr size_t queueSize = 1 << 20;  // default taken from binlog
+
+  binlog::SessionWriter w;
+  Log::Level level_;
+};
+
+}  // namespace codeql
--- a/swift/extractor/main.cpp
+++ b/swift/extractor/main.cpp
@@ -16,9 +16,12 @@
 #include "swift/extractor/invocation/SwiftInvocationExtractor.h"
 #include "swift/extractor/trap/TrapDomain.h"
 #include "swift/extractor/infra/file/Path.h"
+#include "swift/extractor/infra/log/SwiftLogging.h"

 using namespace std::string_literals;

+const std::string_view codeql::logRootName = "extractor";
+
 // must be called before processFrontendOptions modifies output paths
 static void lockOutputSwiftModuleTraps(codeql::SwiftExtractorState& state,
                                       const swift::FrontendOptions& options) {
@@ -179,7 +182,28 @@ codeql::SwiftExtractorConfiguration configure(int argc, char** argv) {
  return configuration;
 }

-int main(int argc, char** argv) {
+// TODO: use `absl::StrJoin` or `boost::algorithm::join`
+static auto argDump(int argc, char** argv) {
+  std::string ret;
+  for (auto arg = argv + 1; arg < argv + argc; ++arg) {
+    ret += *arg;
+    ret += ' ';
+  }
+  ret.pop_back();
+  return ret;
+}
+
+// TODO: use `absl::StrJoin` or `boost::algorithm::join`
+static auto envDump(char** envp) {
+  std::string ret;
+  for (auto env = envp; *env; ++env) {
+    ret += *env;
+    ret += '\n';
+  }
+  return ret;
+}
+
+int main(int argc, char** argv, char** envp) {
  checkWhetherToRunUnderTool(argc, argv);

  if (argc == 1) {
@@ -193,6 +217,11 @@ int main(int argc, char** argv) {
  initializeSwiftModules();

  const auto configuration = configure(argc, argv);
+  {
+    codeql::Logger logger{"main"};
+    LOG_INFO("calling extractor with arguments \"{}\"", argDump(argc, argv));
+    LOG_DEBUG("environment:\n{}\n", envDump(envp));
+  }

  auto openInterception = codeql::setupFileInterception(configuration);

@@ -204,5 +233,7 @@ int main(int argc, char** argv) {
    observer.markSuccessfullyExtractedFiles();
  }

+  codeql::Log::flush();
+
  return frontend_rc;
 }
--- a/swift/extractor/trap/BUILD.bazel
+++ b/swift/extractor/trap/BUILD.bazel
@@ -49,5 +49,6 @@ swift_cc_library(
    visibility = ["//visibility:public"],
    deps = [
        "//swift/extractor/infra/file",
+        "//swift/extractor/infra/log",
    ],
 )
--- a/swift/extractor/trap/TrapDomain.h
+++ b/swift/extractor/trap/TrapDomain.h
@@ -5,18 +5,23 @@

 #include "swift/extractor/trap/TrapLabel.h"
 #include "swift/extractor/infra/file/TargetFile.h"
+#include "swift/extractor/infra/log/SwiftLogging.h"

 namespace codeql {

 // Abstracts a given trap output file, with its own universe of trap labels
 class TrapDomain {
  TargetFile out;
+  Logger logger{getLoggerName()};

 public:
-  explicit TrapDomain(TargetFile&& out) : out{std::move(out)} {}
+  explicit TrapDomain(TargetFile&& out) : out{std::move(out)} {
+    LOG_DEBUG("writing trap file with target {}", this->out.target());
+  }

  template <typename Entry>
  void emit(const Entry& e) {
+    LOG_TRACE("{}", e);
    out << e << '\n';
  }

@@ -48,6 +53,7 @@ class TrapDomain {
                                                 Args&&... args) {
    auto ret = allocateLabel<Tag>();
    assignKey(ret, std::forward<Args>(args)...);
+    LOG_TRACE("^^^ .implementation {}", implementationId);
    out << " .implementation " << trapQuoted(implementationId) << '\n';
    return ret;
  }
@@ -62,6 +68,7 @@ class TrapDomain {

  template <typename Tag>
  void assignStar(TrapLabel<Tag> label) {
+    LOG_TRACE("{}=*", label);
    out << label << "=*";
  }

@@ -69,6 +76,7 @@ class TrapDomain {
  void assignKey(TrapLabel<Tag> label, const std::string& key) {
    // prefix the key with the id to guarantee the same key is not used wrongly with different tags
    auto prefixed = std::string(Tag::prefix) + '_' + key;
+    LOG_TRACE("{}=@{}", label, prefixed);
    out << label << "=@" << trapQuoted(prefixed);
  }

@@ -78,6 +86,17 @@ class TrapDomain {
    (oss << ... << keyParts);
    assignKey(label, oss.str());
  }
+
+  std::string getLoggerName() {
+    // packaged swift modules are typically structured as
+    // `Module.swiftmodule/<arch_triple>.swiftmodule`, so the parent is more informative
+    // We use `Module.swiftmodule/.trap` then
+    if (auto parent = out.target().parent_path(); parent.extension() == ".swiftmodule") {
+      return parent.filename() / ".trap";
+    } else {
+      return out.target().filename();
+    }
+  }
 };

 }  // namespace codeql
--- a/swift/extractor/trap/TrapLabel.h
+++ b/swift/extractor/trap/TrapLabel.h
@@ -5,6 +5,9 @@
 #include <iostream>
 #include <string>
 #include <vector>
+#include <binlog/binlog.hpp>
+#include <cmath>
+#include <charconv>

 namespace codeql {

@@ -18,6 +21,7 @@ class UntypedTrapLabel {
  friend class std::hash<UntypedTrapLabel>;
  template <typename Tag>
  friend class TrapLabel;
+  BINLOG_ADAPT_STRUCT_FRIEND;

  static constexpr uint64_t undefined = 0xffffffffffffffff;

@@ -38,7 +42,22 @@ class UntypedTrapLabel {
    return out;
  }

+  std::string str() const {
+    std::string ret(strSize(), '\0');
+    ret[0] = '#';
+    std::to_chars(ret.data() + 1, ret.data() + ret.size(), id_, 16);
+    return ret;
+  }
+
  friend bool operator==(UntypedTrapLabel lhs, UntypedTrapLabel rhs) { return lhs.id_ == rhs.id_; }
+
+ private:
+  size_t strSize() const {
+    if (id_ == undefined) return 17;  // #ffffffffffffffff
+    if (id_ == 0) return 2;           // #0
+    // TODO: use absl::bit_width or C+20 std::bit_width instead of this ugly formula
+    return /* # */ 1 + /* hex digits */ static_cast<size_t>(ceil(log2(id_ + 1) / 4));
+  }
 };

 template <typename TagParam>
@@ -100,3 +119,33 @@ struct hash<codeql::UntypedTrapLabel> {
  }
 };
 }  // namespace std
+
+namespace mserialize {
+// log labels using their string representation, using binlog/mserialize internal plumbing
+template <>
+struct CustomTag<codeql::UntypedTrapLabel, void> : detail::BuiltinTag<std::string> {
+  using T = codeql::UntypedTrapLabel;
+};
+
+template <typename Tag>
+struct CustomTag<codeql::TrapLabel<Tag>, void> : detail::BuiltinTag<std::string> {
+  using T = codeql::TrapLabel<Tag>;
+};
+
+template <>
+struct CustomSerializer<codeql::UntypedTrapLabel, void> {
+  template <typename OutputStream>
+  static void serialize(codeql::UntypedTrapLabel label, OutputStream& out) {
+    mserialize::serialize(label.str(), out);
+  }
+
+  static size_t serialized_size(codeql::UntypedTrapLabel label) {
+    return sizeof(std::uint32_t) + label.strSize();
+  }
+};
+
+template <typename Tag>
+struct CustomSerializer<codeql::TrapLabel<Tag>, void> : CustomSerializer<codeql::UntypedTrapLabel> {
+};
+
+}  // namespace mserialize
--- a/swift/third_party/binlog/BUILD.bazel
+++ b/swift/third_party/binlog/BUILD.bazel
--- a/swift/third_party/binlog/BUILD.binlog.bazel
+++ b/swift/third_party/binlog/BUILD.binlog.bazel
@@ -0,0 +1,19 @@
+cc_library(
+  name = "binlog",
+  hdrs = glob(["include/**/*.hpp"]),
+  srcs = glob(["include/**/*.cpp"]),
+  includes = ["include"],
+  visibility = ["//visibility:public"],
+)
+
+cc_binary(
+  name = "bread",
+  srcs = ["bin/bread.cpp", "bin/printers.hpp", "bin/printers.cpp", "bin/getopt.hpp"],
+  deps = [":binlog"],
+)
+
+cc_binary(
+  name = "brecovery",
+  srcs = ["bin/brecovery.cpp", "bin/getopt.hpp"],
+  deps = [":binlog"],
+)
--- a/swift/third_party/load.bzl
+++ b/swift/third_party/load.bzl
@@ -12,16 +12,29 @@ _swift_arch_map = {
    "macOS-X64": "darwin_x86_64",
 }

-def _get_label(repository_name, package, target):
-    return "@%s//swift/third_party/%s:%s" % (repository_name, package, target)
+def _get_label(workspace_name, package, target):
+    return "@%s//swift/third_party/%s:%s" % (workspace_name, package, target)

-def _get_build(repository_name, package):
-    return _get_label(repository_name, package, "BUILD.%s.bazel" % package)
+def _get_build(workspace_name, package):
+    return _get_label(workspace_name, package, "BUILD.%s.bazel" % package)

-def _get_patch(repository_name, package, patch):
-    return _get_label(repository_name, package, "patches/%s.patch" % patch)
+def _get_patch(workspace_name, package, patch):
+    return _get_label(workspace_name, package, "patches/%s.patch" % patch)

-def load_dependencies(repository_name):
+def _github_archive(*, name, workspace_name, repository, commit, sha256 = None, patches = None):
+    github_name = repository[repository.index("/") + 1:]
+    patches = [_get_patch(workspace_name, name, p) for p in patches or []]
+    http_archive(
+        name = name,
+        url = "https://github.com/%s/archive/%s.zip" % (repository, commit),
+        strip_prefix = "%s-%s" % (github_name, commit),
+        build_file = _get_build(workspace_name, name),
+        sha256 = sha256,
+        patch_args = ["-p1"],
+        patches = patches,
+    )
+
+def load_dependencies(workspace_name):
    for repo_arch, arch in _swift_arch_map.items():
        sha256 = _swift_sha_map[repo_arch]

@@ -31,16 +44,24 @@ def load_dependencies(repository_name):
                _swift_prebuilt_version,
                repo_arch,
            ),
-            build_file = _get_build(repository_name, "swift-llvm-support"),
+            build_file = _get_build(workspace_name, "swift-llvm-support"),
            sha256 = sha256,
            patch_args = ["-p1"],
            patches = [],
        )

-    http_archive(
+    _github_archive(
        name = "picosha2",
-        url = "https://github.com/okdshin/PicoSHA2/archive/27fcf6979298949e8a462e16d09a0351c18fcaf2.zip",
-        strip_prefix = "PicoSHA2-27fcf6979298949e8a462e16d09a0351c18fcaf2",
-        build_file = _get_build(repository_name, "picosha2"),
+        workspace_name = workspace_name,
+        repository = "okdshin/PicoSHA2",
+        commit = "27fcf6979298949e8a462e16d09a0351c18fcaf2",
        sha256 = "d6647ca45a8b7bdaf027ecb68d041b22a899a0218b7206dee755c558a2725abb",
    )
+
+    _github_archive(
+        name = "binlog",
+        workspace_name = workspace_name,
+        repository = "morganstanley/binlog",
+        commit = "3fef8846f5ef98e64211e7982c2ead67e0b185a6",
+        sha256 = "f5c61d90a6eff341bf91771f2f465be391fd85397023e1b391c17214f9cbd045",
+    )
				`@@ -0,0 +1 @@`
				`{{#bases}}{{#ref}}{{> cpp_list_fields}}{{/ref}}{{/bases}}{{#fields}}, {{field_name}}{{/fields}}`