Merge branch 'main' of github.com:github/codeql into MagicMethods

2025-12-20 10:46:30 +01:00 · 2020-08-26 17:42:44 +02:00
parent de1c75c279 b1946c60dd
commit 47e35c530d
350 changed files with 20369 additions and 3049 deletions
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll
@@ -1066,7 +1066,7 @@ private module LocalFlowBigStep {
   * Holds if `node` can be the first node in a maximal subsequence of local
   * flow steps in a dataflow path.
   */
-  private predicate localFlowEntry(Node node, Configuration config) {
+  predicate localFlowEntry(Node node, Configuration config) {
    nodeCand2(node, config) and
    (
      config.isSource(node) or
@@ -1650,53 +1650,53 @@ private class AccessPathOption extends TAccessPathOption {
 * Holds if `node` is reachable with access path `ap` from a source in
 * the configuration `config`.
 *
- * The Boolean `fromArg` records whether the node is reached through an
+ * The call context `cc` records whether the node is reached through an
 * argument in a call, and if so, `argAp` records the access path of that
 * argument.
 */
 private predicate flowFwd(
-  Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
-  flowFwd0(node, fromArg, argAp, apf, ap, config) and
+  flowFwd0(node, cc, argAp, apf, ap, config) and
  flowCand(node, _, _, apf, config)
 }

 private predicate flowFwd0(
-  Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
  flowCand(node, _, _, _, config) and
  config.isSource(node) and
-  fromArg = false and
+  cc instanceof CallContextAny and
  argAp = TAccessPathNone() and
  ap = TNil(getNodeType(node)) and
  apf = ap.(AccessPathNil).getFront()
  or
  flowCand(node, _, _, _, unbind(config)) and
  (
-    exists(Node mid |
-      flowFwd(mid, fromArg, argAp, apf, ap, config) and
-      localFlowBigStep(mid, node, true, _, config, _)
+    exists(Node mid, LocalCallContext localCC |
+      flowFwdLocalEntry(mid, cc, argAp, apf, ap, localCC, config) and
+      localFlowBigStep(mid, node, true, _, config, localCC)
    )
    or
-    exists(Node mid, AccessPathNil nil |
-      flowFwd(mid, fromArg, argAp, _, nil, config) and
-      localFlowBigStep(mid, node, false, apf, config, _) and
+    exists(Node mid, AccessPathNil nil, LocalCallContext localCC |
+      flowFwdLocalEntry(mid, cc, argAp, _, nil, localCC, config) and
+      localFlowBigStep(mid, node, false, apf, config, localCC) and
      apf = ap.(AccessPathNil).getFront()
    )
    or
    exists(Node mid |
      flowFwd(mid, _, _, apf, ap, config) and
      jumpStep(mid, node, config) and
-      fromArg = false and
+      cc instanceof CallContextAny and
      argAp = TAccessPathNone()
    )
    or
    exists(Node mid, AccessPathNil nil |
      flowFwd(mid, _, _, _, nil, config) and
      additionalJumpStep(mid, node, config) and
-      fromArg = false and
+      cc instanceof CallContextAny and
      argAp = TAccessPathNone() and
      ap = TNil(getNodeType(node)) and
      apf = ap.(AccessPathNil).getFront()
@@ -1704,40 +1704,51 @@ private predicate flowFwd0(
  )
  or
  // store
-  exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, fromArg, argAp, config))
+  exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, cc, argAp, config))
  or
  // read
  exists(TypedContent tc |
-    flowFwdRead(node, _, push(tc, ap), apf, fromArg, argAp, config) and
+    flowFwdRead(node, _, push(tc, ap), apf, cc, argAp, config) and
    flowFwdConsCand(tc, apf, ap, config)
  )
  or
  // flow into a callable
-  flowFwdIn(_, node, _, _, apf, ap, config) and
-  fromArg = true and
+  flowFwdIn(_, node, _, cc, _, apf, ap, config) and
  if flowCand(node, true, _, apf, config)
  then argAp = TAccessPathSome(ap)
  else argAp = TAccessPathNone()
  or
  // flow out of a callable
  exists(DataFlowCall call |
-    flowFwdOut(call, node, fromArg, argAp, apf, ap, config) and
-    fromArg = false
+    exists(DataFlowCallable c |
+      flowFwdOut(call, node, any(CallContextNoCall innercc), c, argAp, apf, ap, config) and
+      if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+    )
    or
    exists(AccessPath argAp0 |
      flowFwdOutFromArg(call, node, argAp0, apf, ap, config) and
-      flowFwdIsEntered(call, fromArg, argAp, argAp0, config)
+      flowFwdIsEntered(call, cc, argAp, argAp0, config)
    )
  )
 }

+pragma[nomagic]
+private predicate flowFwdLocalEntry(
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  LocalCallContext localCC, Configuration config
+) {
+  flowFwd(node, cc, argAp, apf, ap, config) and
+  localFlowEntry(node, config) and
+  localCC = getLocalCallContext(cc, node.getEnclosingCallable())
+}
+
 pragma[nomagic]
 private predicate flowFwdStore(
-  Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, boolean fromArg,
+  Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, CallContext cc,
  AccessPathOption argAp, Configuration config
 ) {
  exists(Node mid, AccessPathFront apf0 |
-    flowFwd(mid, fromArg, argAp, apf0, ap0, config) and
+    flowFwd(mid, cc, argAp, apf0, ap0, config) and
    flowFwdStore0(mid, tc, node, apf0, apf, config)
  )
 }
@@ -1764,20 +1775,20 @@ private predicate flowFwdStore0(

 pragma[nomagic]
 private predicate flowFwdRead0(
-  Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2,
-  boolean fromArg, AccessPathOption argAp, Configuration config
+  Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2, CallContext cc,
+  AccessPathOption argAp, Configuration config
 ) {
-  flowFwd(node1, fromArg, argAp, apf0, ap0, config) and
+  flowFwd(node1, cc, argAp, apf0, ap0, config) and
  readCandFwd(node1, tc, apf0, node2, config)
 }

 pragma[nomagic]
 private predicate flowFwdRead(
-  Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, boolean fromArg,
+  Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, CallContext cc,
  AccessPathOption argAp, Configuration config
 ) {
  exists(Node mid, TypedContent tc |
-    flowFwdRead0(mid, tc, apf0, ap0, node, fromArg, argAp, config) and
+    flowFwdRead0(mid, tc, apf0, ap0, node, cc, argAp, config) and
    flowCand(node, _, _, apf, unbind(config)) and
    flowCandConsCand(tc, apf, unbind(config))
  )
@@ -1795,13 +1806,16 @@ private predicate flowFwdConsCand(

 pragma[nomagic]
 private predicate flowFwdIn(
-  DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathOption argAp, AccessPathFront apf,
-  AccessPath ap, Configuration config
+  DataFlowCall call, ParameterNode p, CallContext outercc, CallContext innercc,
+  AccessPathOption argAp, AccessPathFront apf, AccessPath ap, Configuration config
 ) {
-  exists(ArgumentNode arg, boolean allowsFieldFlow |
-    flowFwd(arg, fromArg, argAp, apf, ap, config) and
+  exists(ArgumentNode arg, boolean allowsFieldFlow, DataFlowCallable c |
+    flowFwd(arg, outercc, argAp, apf, ap, config) and
    flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) and
-    flowCand(p, _, _, _, unbind(config))
+    c = p.getEnclosingCallable() and
+    c = resolveCall(call, outercc) and
+    flowCand(p, _, _, _, unbind(config)) and
+    if recordDataFlowCallSite(call, c) then innercc = TSpecificCall(call) else innercc = TSomeCall()
  |
    ap instanceof AccessPathNil or allowsFieldFlow = true
  )
@@ -1809,13 +1823,19 @@ private predicate flowFwdIn(

 pragma[nomagic]
 private predicate flowFwdOut(
-  DataFlowCall call, Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf,
-  AccessPath ap, Configuration config
+  DataFlowCall call, Node node, CallContext innercc, DataFlowCallable innerc,
+  AccessPathOption argAp, AccessPathFront apf, AccessPath ap, Configuration config
 ) {
  exists(ReturnNodeExt ret, boolean allowsFieldFlow |
-    flowFwd(ret, fromArg, argAp, apf, ap, config) and
+    flowFwd(ret, innercc, argAp, apf, ap, config) and
    flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) and
-    flowCand(node, _, _, _, unbind(config))
+    innerc = ret.getEnclosingCallable() and
+    flowCand(node, _, _, _, unbind(config)) and
+    (
+      resolveReturn(innercc, innerc, call)
+      or
+      innercc.(CallContextCall).matchesCall(call)
+    )
  |
    ap instanceof AccessPathNil or allowsFieldFlow = true
  )
@@ -1826,7 +1846,7 @@ private predicate flowFwdOutFromArg(
  DataFlowCall call, Node node, AccessPath argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
-  flowFwdOut(call, node, true, TAccessPathSome(argAp), apf, ap, config)
+  flowFwdOut(call, node, any(CallContextCall ccc), _, TAccessPathSome(argAp), apf, ap, config)
 }

 /**
@@ -1834,10 +1854,10 @@ private predicate flowFwdOutFromArg(
 */
 pragma[nomagic]
 private predicate flowFwdIsEntered(
-  DataFlowCall call, boolean fromArg, AccessPathOption argAp, AccessPath ap, Configuration config
+  DataFlowCall call, CallContext cc, AccessPathOption argAp, AccessPath ap, Configuration config
 ) {
  exists(ParameterNode p, AccessPathFront apf |
-    flowFwdIn(call, p, fromArg, argAp, apf, ap, config) and
+    flowFwdIn(call, p, cc, _, argAp, apf, ap, config) and
    flowCand(p, true, TAccessPathFrontSome(_), apf, config)
  )
 }
@@ -1920,7 +1940,7 @@ private predicate flow0(
  // flow out of a callable
  flowOut(_, node, _, _, ap, config) and
  toReturn = true and
-  if flowFwd(node, true, TAccessPathSome(_), _, ap, config)
+  if flowFwd(node, any(CallContextCall ccc), TAccessPathSome(_), _, ap, config)
  then returnAp = TAccessPathSome(ap)
  else returnAp = TAccessPathNone()
 }
@@ -2006,9 +2026,10 @@ private predicate flowIsReturned(
  DataFlowCall call, boolean toReturn, AccessPathOption returnAp, AccessPath ap,
  Configuration config
 ) {
-  exists(ReturnNodeExt ret |
+  exists(ReturnNodeExt ret, CallContextCall ccc |
    flowOut(call, ret, toReturn, returnAp, ap, config) and
-    flowFwd(ret, true, TAccessPathSome(_), _, ap, config)
+    flowFwd(ret, ccc, TAccessPathSome(_), _, ap, config) and
+    ccc.matchesCall(call)
  )
 }

@@ -2031,7 +2052,7 @@ private newtype TSummaryCtx =
    exists(ReturnNodeExt ret, Configuration config, AccessPath ap0 |
      parameterFlow(p, ap, ret.getEnclosingCallable(), config) and
      flow(ret, true, TAccessPathSome(_), ap0, config) and
-      flowFwd(ret, true, TAccessPathSome(ap), _, ap0, config)
+      flowFwd(ret, any(CallContextCall ccc), TAccessPathSome(ap), _, ap0, config)
    )
  }

@@ -2352,7 +2373,7 @@ private predicate pathOutOfCallable0(
 ) {
  pos = getReturnPosition(mid.getNode()) and
  innercc = mid.getCallContext() and
-  not innercc instanceof CallContextCall and
+  innercc instanceof CallContextNoCall and
  ap = mid.getAp() and
  config = mid.getConfiguration()
 }
@@ -2867,7 +2888,7 @@ private module FlowExploration {
  ) {
    pos = getReturnPosition(mid.getNode()) and
    innercc = mid.getCallContext() and
-    not innercc instanceof CallContextCall and
+    innercc instanceof CallContextNoCall and
    ap = mid.getAp() and
    config = mid.getConfiguration()
  }
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll
@@ -1066,7 +1066,7 @@ private module LocalFlowBigStep {
   * Holds if `node` can be the first node in a maximal subsequence of local
   * flow steps in a dataflow path.
   */
-  private predicate localFlowEntry(Node node, Configuration config) {
+  predicate localFlowEntry(Node node, Configuration config) {
    nodeCand2(node, config) and
    (
      config.isSource(node) or
@@ -1650,53 +1650,53 @@ private class AccessPathOption extends TAccessPathOption {
 * Holds if `node` is reachable with access path `ap` from a source in
 * the configuration `config`.
 *
- * The Boolean `fromArg` records whether the node is reached through an
+ * The call context `cc` records whether the node is reached through an
 * argument in a call, and if so, `argAp` records the access path of that
 * argument.
 */
 private predicate flowFwd(
-  Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
-  flowFwd0(node, fromArg, argAp, apf, ap, config) and
+  flowFwd0(node, cc, argAp, apf, ap, config) and
  flowCand(node, _, _, apf, config)
 }

 private predicate flowFwd0(
-  Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
  flowCand(node, _, _, _, config) and
  config.isSource(node) and
-  fromArg = false and
+  cc instanceof CallContextAny and
  argAp = TAccessPathNone() and
  ap = TNil(getNodeType(node)) and
  apf = ap.(AccessPathNil).getFront()
  or
  flowCand(node, _, _, _, unbind(config)) and
  (
-    exists(Node mid |
-      flowFwd(mid, fromArg, argAp, apf, ap, config) and
-      localFlowBigStep(mid, node, true, _, config, _)
+    exists(Node mid, LocalCallContext localCC |
+      flowFwdLocalEntry(mid, cc, argAp, apf, ap, localCC, config) and
+      localFlowBigStep(mid, node, true, _, config, localCC)
    )
    or
-    exists(Node mid, AccessPathNil nil |
-      flowFwd(mid, fromArg, argAp, _, nil, config) and
-      localFlowBigStep(mid, node, false, apf, config, _) and
+    exists(Node mid, AccessPathNil nil, LocalCallContext localCC |
+      flowFwdLocalEntry(mid, cc, argAp, _, nil, localCC, config) and
+      localFlowBigStep(mid, node, false, apf, config, localCC) and
      apf = ap.(AccessPathNil).getFront()
    )
    or
    exists(Node mid |
      flowFwd(mid, _, _, apf, ap, config) and
      jumpStep(mid, node, config) and
-      fromArg = false and
+      cc instanceof CallContextAny and
      argAp = TAccessPathNone()
    )
    or
    exists(Node mid, AccessPathNil nil |
      flowFwd(mid, _, _, _, nil, config) and
      additionalJumpStep(mid, node, config) and
-      fromArg = false and
+      cc instanceof CallContextAny and
      argAp = TAccessPathNone() and
      ap = TNil(getNodeType(node)) and
      apf = ap.(AccessPathNil).getFront()
@@ -1704,40 +1704,51 @@ private predicate flowFwd0(
  )
  or
  // store
-  exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, fromArg, argAp, config))
+  exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, cc, argAp, config))
  or
  // read
  exists(TypedContent tc |
-    flowFwdRead(node, _, push(tc, ap), apf, fromArg, argAp, config) and
+    flowFwdRead(node, _, push(tc, ap), apf, cc, argAp, config) and
    flowFwdConsCand(tc, apf, ap, config)
  )
  or
  // flow into a callable
-  flowFwdIn(_, node, _, _, apf, ap, config) and
-  fromArg = true and
+  flowFwdIn(_, node, _, cc, _, apf, ap, config) and
  if flowCand(node, true, _, apf, config)
  then argAp = TAccessPathSome(ap)
  else argAp = TAccessPathNone()
  or
  // flow out of a callable
  exists(DataFlowCall call |
-    flowFwdOut(call, node, fromArg, argAp, apf, ap, config) and
-    fromArg = false
+    exists(DataFlowCallable c |
+      flowFwdOut(call, node, any(CallContextNoCall innercc), c, argAp, apf, ap, config) and
+      if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+    )
    or
    exists(AccessPath argAp0 |
      flowFwdOutFromArg(call, node, argAp0, apf, ap, config) and
-      flowFwdIsEntered(call, fromArg, argAp, argAp0, config)
+      flowFwdIsEntered(call, cc, argAp, argAp0, config)
    )
  )
 }

+pragma[nomagic]
+private predicate flowFwdLocalEntry(
+  Node node, CallContext cc, AccessPathOption argAp, AccessPathFront apf, AccessPath ap,
+  LocalCallContext localCC, Configuration config
+) {
+  flowFwd(node, cc, argAp, apf, ap, config) and
+  localFlowEntry(node, config) and
+  localCC = getLocalCallContext(cc, node.getEnclosingCallable())
+}
+
 pragma[nomagic]
 private predicate flowFwdStore(
-  Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, boolean fromArg,
+  Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, CallContext cc,
  AccessPathOption argAp, Configuration config
 ) {
  exists(Node mid, AccessPathFront apf0 |
-    flowFwd(mid, fromArg, argAp, apf0, ap0, config) and
+    flowFwd(mid, cc, argAp, apf0, ap0, config) and
    flowFwdStore0(mid, tc, node, apf0, apf, config)
  )
 }
@@ -1764,20 +1775,20 @@ private predicate flowFwdStore0(

 pragma[nomagic]
 private predicate flowFwdRead0(
-  Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2,
-  boolean fromArg, AccessPathOption argAp, Configuration config
+  Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2, CallContext cc,
+  AccessPathOption argAp, Configuration config
 ) {
-  flowFwd(node1, fromArg, argAp, apf0, ap0, config) and
+  flowFwd(node1, cc, argAp, apf0, ap0, config) and
  readCandFwd(node1, tc, apf0, node2, config)
 }

 pragma[nomagic]
 private predicate flowFwdRead(
-  Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, boolean fromArg,
+  Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, CallContext cc,
  AccessPathOption argAp, Configuration config
 ) {
  exists(Node mid, TypedContent tc |
-    flowFwdRead0(mid, tc, apf0, ap0, node, fromArg, argAp, config) and
+    flowFwdRead0(mid, tc, apf0, ap0, node, cc, argAp, config) and
    flowCand(node, _, _, apf, unbind(config)) and
    flowCandConsCand(tc, apf, unbind(config))
  )
@@ -1795,13 +1806,16 @@ private predicate flowFwdConsCand(

 pragma[nomagic]
 private predicate flowFwdIn(
-  DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathOption argAp, AccessPathFront apf,
-  AccessPath ap, Configuration config
+  DataFlowCall call, ParameterNode p, CallContext outercc, CallContext innercc,
+  AccessPathOption argAp, AccessPathFront apf, AccessPath ap, Configuration config
 ) {
-  exists(ArgumentNode arg, boolean allowsFieldFlow |
-    flowFwd(arg, fromArg, argAp, apf, ap, config) and
+  exists(ArgumentNode arg, boolean allowsFieldFlow, DataFlowCallable c |
+    flowFwd(arg, outercc, argAp, apf, ap, config) and
    flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) and
-    flowCand(p, _, _, _, unbind(config))
+    c = p.getEnclosingCallable() and
+    c = resolveCall(call, outercc) and
+    flowCand(p, _, _, _, unbind(config)) and
+    if recordDataFlowCallSite(call, c) then innercc = TSpecificCall(call) else innercc = TSomeCall()
  |
    ap instanceof AccessPathNil or allowsFieldFlow = true
  )
@@ -1809,13 +1823,19 @@ private predicate flowFwdIn(

 pragma[nomagic]
 private predicate flowFwdOut(
-  DataFlowCall call, Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf,
-  AccessPath ap, Configuration config
+  DataFlowCall call, Node node, CallContext innercc, DataFlowCallable innerc,
+  AccessPathOption argAp, AccessPathFront apf, AccessPath ap, Configuration config
 ) {
  exists(ReturnNodeExt ret, boolean allowsFieldFlow |
-    flowFwd(ret, fromArg, argAp, apf, ap, config) and
+    flowFwd(ret, innercc, argAp, apf, ap, config) and
    flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) and
-    flowCand(node, _, _, _, unbind(config))
+    innerc = ret.getEnclosingCallable() and
+    flowCand(node, _, _, _, unbind(config)) and
+    (
+      resolveReturn(innercc, innerc, call)
+      or
+      innercc.(CallContextCall).matchesCall(call)
+    )
  |
    ap instanceof AccessPathNil or allowsFieldFlow = true
  )
@@ -1826,7 +1846,7 @@ private predicate flowFwdOutFromArg(
  DataFlowCall call, Node node, AccessPath argAp, AccessPathFront apf, AccessPath ap,
  Configuration config
 ) {
-  flowFwdOut(call, node, true, TAccessPathSome(argAp), apf, ap, config)
+  flowFwdOut(call, node, any(CallContextCall ccc), _, TAccessPathSome(argAp), apf, ap, config)
 }

 /**
@@ -1834,10 +1854,10 @@ private predicate flowFwdOutFromArg(
 */
 pragma[nomagic]
 private predicate flowFwdIsEntered(
-  DataFlowCall call, boolean fromArg, AccessPathOption argAp, AccessPath ap, Configuration config
+  DataFlowCall call, CallContext cc, AccessPathOption argAp, AccessPath ap, Configuration config
 ) {
  exists(ParameterNode p, AccessPathFront apf |
-    flowFwdIn(call, p, fromArg, argAp, apf, ap, config) and
+    flowFwdIn(call, p, cc, _, argAp, apf, ap, config) and
    flowCand(p, true, TAccessPathFrontSome(_), apf, config)
  )
 }
@@ -1920,7 +1940,7 @@ private predicate flow0(
  // flow out of a callable
  flowOut(_, node, _, _, ap, config) and
  toReturn = true and
-  if flowFwd(node, true, TAccessPathSome(_), _, ap, config)
+  if flowFwd(node, any(CallContextCall ccc), TAccessPathSome(_), _, ap, config)
  then returnAp = TAccessPathSome(ap)
  else returnAp = TAccessPathNone()
 }
@@ -2006,9 +2026,10 @@ private predicate flowIsReturned(
  DataFlowCall call, boolean toReturn, AccessPathOption returnAp, AccessPath ap,
  Configuration config
 ) {
-  exists(ReturnNodeExt ret |
+  exists(ReturnNodeExt ret, CallContextCall ccc |
    flowOut(call, ret, toReturn, returnAp, ap, config) and
-    flowFwd(ret, true, TAccessPathSome(_), _, ap, config)
+    flowFwd(ret, ccc, TAccessPathSome(_), _, ap, config) and
+    ccc.matchesCall(call)
  )
 }

@@ -2031,7 +2052,7 @@ private newtype TSummaryCtx =
    exists(ReturnNodeExt ret, Configuration config, AccessPath ap0 |
      parameterFlow(p, ap, ret.getEnclosingCallable(), config) and
      flow(ret, true, TAccessPathSome(_), ap0, config) and
-      flowFwd(ret, true, TAccessPathSome(ap), _, ap0, config)
+      flowFwd(ret, any(CallContextCall ccc), TAccessPathSome(ap), _, ap0, config)
    )
  }

@@ -2352,7 +2373,7 @@ private predicate pathOutOfCallable0(
 ) {
  pos = getReturnPosition(mid.getNode()) and
  innercc = mid.getCallContext() and
-  not innercc instanceof CallContextCall and
+  innercc instanceof CallContextNoCall and
  ap = mid.getAp() and
  config = mid.getConfiguration()
 }
@@ -2867,7 +2888,7 @@ private module FlowExploration {
  ) {
    pos = getReturnPosition(mid.getNode()) and
    innercc = mid.getCallContext() and
-    not innercc instanceof CallContextCall and
+    innercc instanceof CallContextNoCall and
    ap = mid.getAp() and
    config = mid.getConfiguration()
  }
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll
@@ -512,13 +512,19 @@ abstract class CallContext extends TCallContext {
  abstract predicate relevantFor(DataFlowCallable callable);
 }

-class CallContextAny extends CallContext, TAnyCallContext {
+abstract class CallContextNoCall extends CallContext { }
+
+class CallContextAny extends CallContextNoCall, TAnyCallContext {
  override string toString() { result = "CcAny" }

  override predicate relevantFor(DataFlowCallable callable) { any() }
 }

-abstract class CallContextCall extends CallContext { }
+abstract class CallContextCall extends CallContext {
+  /** Holds if this call context may be `call`. */
+  bindingset[call]
+  abstract predicate matchesCall(DataFlowCall call);
+}

 class CallContextSpecificCall extends CallContextCall, TSpecificCall {
  override string toString() {
@@ -529,6 +535,8 @@ class CallContextSpecificCall extends CallContextCall, TSpecificCall {
    recordDataFlowCallSite(getCall(), callable)
  }

+  override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
+
  DataFlowCall getCall() { this = TSpecificCall(result) }
 }

@@ -538,9 +546,11 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
  override predicate relevantFor(DataFlowCallable callable) {
    exists(ParameterNode p | p.getEnclosingCallable() = callable)
  }
+
+  override predicate matchesCall(DataFlowCall call) { any() }
 }

-class CallContextReturn extends CallContext, TReturn {
+class CallContextReturn extends CallContextNoCall, TReturn {
  override string toString() {
    exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")")
  }
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
@@ -328,15 +328,193 @@ predicate jumpStep(Node pred, Node succ) {
 // Field flow
 //--------
 /**
- * Holds if data can flow from `node1` to `node2` via an assignment to
+ * Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
 * content `c`.
 */
-predicate storeStep(Node node1, Content c, Node node2) { none() }
+predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
+  listStoreStep(nodeFrom, c, nodeTo)
+  or
+  setStoreStep(nodeFrom, c, nodeTo)
+  or
+  tupleStoreStep(nodeFrom, c, nodeTo)
+  or
+  dictStoreStep(nodeFrom, c, nodeTo)
+  or
+  comprehensionStoreStep(nodeFrom, c, nodeTo)
+}
+
+/** Data flows from an element of a list to the list. */
+predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
+  // List
+  //   `[..., 42, ...]`
+  //   nodeFrom is `42`, cfg node
+  //   nodeTo is the list, `[..., 42, ...]`, cfg node
+  //   c denotes element of list
+  nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode()
+}
+
+/** Data flows from an element of a set to the set. */
+predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
+  // Set
+  //   `{..., 42, ...}`
+  //   nodeFrom is `42`, cfg node
+  //   nodeTo is the set, `{..., 42, ...}`, cfg node
+  //   c denotes element of list
+  nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode()
+}
+
+/** Data flows from an element of a tuple to the tuple at a specific index. */
+predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo) {
+  // Tuple
+  //   `(..., 42, ...)`
+  //   nodeFrom is `42`, cfg node
+  //   nodeTo is the tuple, `(..., 42, ...)`, cfg node
+  //   c denotes element of tuple and index of nodeFrom
+  exists(int n |
+    nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and
+    c.getIndex() = n
+  )
+}
+
+/** Data flows from an element of a dictionary to the dictionary at a specific key. */
+predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
+  // Dictionary
+  //   `{..., "key" = 42, ...}`
+  //   nodeFrom is `42`, cfg node
+  //   nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
+  //   c denotes element of dictionary and the key `"key"`
+  exists(KeyValuePair item |
+    item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
+    nodeFrom.getNode().getNode() = item.getValue() and
+    c.getKey() = item.getKey().(StrConst).getS()
+  )
+}
+
+/** Data flows from an element expression in a comprehension to the comprehension. */
+predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
+  // Comprehension
+  //   `[x+1 for x in l]`
+  //   nodeFrom is `x+1`, cfg node
+  //   nodeTo is `[x+1 for x in l]`, cfg node
+  //   c denotes list or set or dictionary without index
+  //
+  // List
+  nodeTo.getNode().getNode().(ListComp).getElt() = nodeFrom.getNode().getNode() and
+  c instanceof ListElementContent
+  or
+  // Set
+  nodeTo.getNode().getNode().(SetComp).getElt() = nodeFrom.getNode().getNode() and
+  c instanceof SetElementContent
+  or
+  // Dictionary
+  nodeTo.getNode().getNode().(DictComp).getElt() = nodeFrom.getNode().getNode() and
+  c instanceof DictionaryElementAnyContent
+}

 /**
- * Holds if data can flow from `node1` to `node2` via a read of content `c`.
+ * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
 */
-predicate readStep(Node node1, Content c, Node node2) { none() }
+predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
+  subscriptReadStep(nodeFrom, c, nodeTo)
+  or
+  popReadStep(nodeFrom, c, nodeTo)
+  or
+  comprehensionReadStep(nodeFrom, c, nodeTo)
+}
+
+/** Data flows from a sequence to a subscript of the sequence. */
+predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
+  // Subscript
+  //   `l[3]`
+  //   nodeFrom is `l`, cfg node
+  //   nodeTo is `l[3]`, cfg node
+  //   c is compatible with 3
+  nodeFrom.getNode() = nodeTo.getNode().(SubscriptNode).getObject() and
+  (
+    c instanceof ListElementContent
+    or
+    c instanceof SetElementContent
+    or
+    c instanceof DictionaryElementAnyContent
+    or
+    c.(TupleElementContent).getIndex() =
+      nodeTo.getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
+    or
+    c.(DictionaryElementContent).getKey() =
+      nodeTo.getNode().(SubscriptNode).getIndex().getNode().(StrConst).getS()
+  )
+}
+
+/** Data flows from a sequence to a call to `pop` on the sequence. */
+predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
+  // set.pop or list.pop
+  //   `s.pop()`
+  //   nodeFrom is `s`, cfg node
+  //   nodeTo is `s.pop()`, cfg node
+  //   c denotes element of list or set
+  exists(CallNode call, AttrNode a |
+    call.getFunction() = a and
+    a.getName() = "pop" and // Should match appropriate call since we tracked a sequence here.
+    not exists(call.getAnArg()) and
+    nodeFrom.getNode() = a.getObject() and
+    nodeTo.getNode() = call and
+    (
+      c instanceof ListElementContent
+      or
+      c instanceof SetElementContent
+    )
+  )
+  or
+  // dict.pop
+  //   `d.pop("key")`
+  //   nodeFrom is `d`, cfg node
+  //   nodeTo is `d.pop("key")`, cfg node
+  //   c denotes the key `"key"`
+  exists(CallNode call, AttrNode a |
+    call.getFunction() = a and
+    a.getName() = "pop" and // Should match appropriate call since we tracked a dictionary here.
+    nodeFrom.getNode() = a.getObject() and
+    nodeTo.getNode() = call and
+    c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
+  )
+}
+
+/** Data flows from a iterated sequence to the variable iterating over the sequence. */
+predicate comprehensionReadStep(CfgNode nodeFrom, Content c, EssaNode nodeTo) {
+  // Comprehension
+  //   `[x+1 for x in l]`
+  //   nodeFrom is `l`, cfg node
+  //   nodeTo is `x`, essa var
+  //   c denotes element of list or set
+  exists(For f, Comp comp |
+    f = getCompFor(comp) and
+    nodeFrom.getNode().getNode() = getCompIter(comp) and
+    nodeTo.getVar().getDefinition().(AssignmentDefinition).getDefiningNode().getNode() =
+      f.getTarget() and
+    (
+      c instanceof ListElementContent
+      or
+      c instanceof SetElementContent
+    )
+  )
+}
+
+/** This seems to compensate for extractor shortcomings */
+For getCompFor(Comp c) {
+  c.contains(result) and
+  c.getFunction() = result.getScope()
+}
+
+/** This seems to compensate for extractor shortcomings */
+AstNode getCompIter(Comp c) {
+  c.contains(result) and
+  c.getScope() = result.getScope() and
+  not result = c.getFunction() and
+  not exists(AstNode between |
+    c.contains(between) and
+    between.contains(result)
+  )
+}

 /**
 * Holds if values stored inside content `c` are cleared at node `n`. For example,
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
@@ -33,8 +33,15 @@ class Node extends TNode {
  /** Gets the scope of this node. */
  Scope getScope() { none() }

+  private DataFlowCallable getCallableScope(Scope s) {
+    result.getScope() = s
+    or
+    not exists(DataFlowCallable c | c.getScope() = s) and
+    result = getCallableScope(s.getEnclosingScope())
+  }
+
  /** Gets the enclosing callable of this node. */
-  DataFlowCallable getEnclosingCallable() { result.getScope() = this.getScope() }
+  DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }

  /** Gets the location of this node */
  Location getLocation() { none() }
@@ -138,6 +145,62 @@ class BarrierGuard extends Expr {
 /**
 * A reference contained in an object. This is either a field or a property.
 */
-class Content extends string {
-  Content() { this = "Content" }
+newtype TContent =
+  /** An element of a list. */
+  TListElementContent() or
+  /** An element of a set. */
+  TSetElementContent() or
+  /** An element of a tuple at a specifik index. */
+  TTupleElementContent(int index) { exists(any(TupleNode tn).getElement(index)) } or
+  /** An element of a dictionary under a specific key. */
+  TDictionaryElementContent(string key) {
+    key = any(KeyValuePair kvp).getKey().(StrConst).getS()
+    or
+    key = any(Keyword kw).getArg()
+  } or
+  /** An element of a dictionary at any key. */
+  TDictionaryElementAnyContent()
+
+class Content extends TContent {
+  /** Gets a textual representation of this element. */
+  string toString() { result = "Content" }
+}
+
+class ListElementContent extends TListElementContent, Content {
+  /** Gets a textual representation of this element. */
+  override string toString() { result = "List element" }
+}
+
+class SetElementContent extends TSetElementContent, Content {
+  /** Gets a textual representation of this element. */
+  override string toString() { result = "Set element" }
+}
+
+class TupleElementContent extends TTupleElementContent, Content {
+  int index;
+
+  TupleElementContent() { this = TTupleElementContent(index) }
+
+  /** Gets the index for this tuple element */
+  int getIndex() { result = index }
+
+  /** Gets a textual representation of this element. */
+  override string toString() { result = "Tuple element at index " + index.toString() }
+}
+
+class DictionaryElementContent extends TDictionaryElementContent, Content {
+  string key;
+
+  DictionaryElementContent() { this = TDictionaryElementContent(key) }
+
+  /** Gets the index for this tuple element */
+  string getKey() { result = key }
+
+  /** Gets a textual representation of this element. */
+  override string toString() { result = "Dictionary element at key " + key }
+}
+
+class DictionaryElementAnyContent extends TDictionaryElementAnyContent, Content {
+  /** Gets a textual representation of this element. */
+  override string toString() { result = "Any dictionary element" }
 }
--- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
@@ -1,7 +1,7 @@
 private import python
-private import TaintTrackingPublic
 private import experimental.dataflow.DataFlow
 private import experimental.dataflow.internal.DataFlowPrivate
+private import experimental.dataflow.internal.TaintTrackingPublic

 /**
 * Holds if `node` should be a barrier in all global taint flow configurations
@@ -10,12 +10,116 @@ private import experimental.dataflow.internal.DataFlowPrivate
 predicate defaultTaintBarrier(DataFlow::Node node) { none() }

 /**
- * Holds if the additional step from `pred` to `succ` should be included in all
+ * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
 * global taint flow configurations.
 */
-predicate defaultAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
-  none()
-  // localAdditionalTaintStep(pred, succ)
-  // or
-  // succ = pred.(DataFlow::NonLocalJumpNode).getAJumpSuccessor(false)
+predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  localAdditionalTaintStep(nodeFrom, nodeTo)
+  or
+  any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
+}
+
+/**
+ * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
+ * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
+ * different objects.
+ */
+predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  concatStep(nodeFrom, nodeTo)
+  or
+  subscriptStep(nodeFrom, nodeTo)
+  or
+  stringManipulation(nodeFrom, nodeTo)
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
+ *
+ * Note that since we cannot easily distinguish interesting types (like string, list, tuple),
+ * we consider any `+` operation to propagate taint. After consulting with the JS team, this
+ * doesn't sound like it is a big problem in practice.
+ */
+predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
+  exists(BinaryExprNode add | add = nodeTo.getNode() |
+    add.getOp() instanceof Add and add.getAnOperand() = nodeFrom.getNode()
+  )
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to subscripting.
+ */
+predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
+  nodeTo.getNode().(SubscriptNode).getObject() = nodeFrom.getNode()
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to string
+ * manipulation.
+ *
+ * Note that since we cannot easily distinguish when something is a string, this can
+ * also make taint flow on `<non string>.replace(foo, bar)`.
+ */
+predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
+  // transforming something tainted into a string will make the string tainted
+  exists(CallNode call | call = nodeTo.getNode() |
+    call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
+    (
+      nodeFrom.getNode() = call.getArg(0)
+      or
+      nodeFrom.getNode() = call.getArgByName("object")
+    )
+  )
+  or
+  // String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
+  exists(CallNode call, string method_name, ControlFlowNode object |
+    call = nodeTo.getNode() and
+    object = call.getFunction().(AttrNode).getObject(method_name)
+  |
+    nodeFrom.getNode() = object and
+    method_name in ["capitalize", "casefold", "center", "expandtabs", "format", "format_map",
+          "join", "ljust", "lstrip", "lower", "replace", "rjust", "rstrip", "strip", "swapcase",
+          "title", "upper", "zfill", "encode", "decode"]
+    or
+    method_name = "replace" and
+    nodeFrom.getNode() = call.getArg(1)
+    or
+    method_name = "format" and
+    nodeFrom.getNode() = call.getAnArg()
+    or
+    // str -> List[str]
+    // TODO: check if these should be handled differently in regards to content
+    nodeFrom.getNode() = object and
+    method_name in ["partition", "rpartition", "rsplit", "split", "splitlines"]
+    or
+    // List[str] -> str
+    // TODO: check if these should be handled differently in regards to content
+    method_name = "join" and
+    nodeFrom.getNode() = call.getArg(0)
+    or
+    // Mapping[str, Any] -> str
+    method_name = "format_map" and
+    nodeFrom.getNode() = call.getArg(0)
+  )
+  or
+  // % formatting
+  exists(BinaryExprNode fmt | fmt = nodeTo.getNode() |
+    fmt.getOp() instanceof Mod and
+    (
+      fmt.getLeft() = nodeFrom.getNode()
+      or
+      fmt.getRight() = nodeFrom.getNode()
+    )
+  )
+  or
+  // string multiplication -- `"foo" * 10`
+  exists(BinaryExprNode mult | mult = nodeTo.getNode() |
+    mult.getOp() instanceof Mult and
+    mult.getLeft() = nodeFrom.getNode()
+  )
+  or
+  // f-strings
+  nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
+  // TODO: Handle encode/decode from base64/quopri
+  // TODO: Handle os.path.join
+  // TODO: Handle functions in https://docs.python.org/3/library/binascii.html
 }
--- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll
+++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll
@@ -6,27 +6,52 @@
 private import python
 private import TaintTrackingPrivate
 private import experimental.dataflow.DataFlow
-// /**
-//  * Holds if taint propagates from `source` to `sink` in zero or more local
-//  * (intra-procedural) steps.
-//  */
-// predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
-// // /**
-// //  * Holds if taint can flow from `e1` to `e2` in zero or more
-// //  * local (intra-procedural) steps.
-// //  */
-// // predicate localExprTaint(Expr e1, Expr e2) {
-// //   localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
-// // }
-// // /** A member (property or field) that is tainted if its containing object is tainted. */
-// // abstract class TaintedMember extends AssignableMember { }
-// /**
-//  * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
-//  * (intra-procedural) step.
-//  */
-// predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-//   // Ordinary data flow
-//   DataFlow::localFlowStep(nodeFrom, nodeTo)
-//   or
-//   localAdditionalTaintStep(nodeFrom, nodeTo)
-// }
+
+// Local taint flow and helpers
+/**
+ * Holds if taint propagates from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
+
+/**
+ * Holds if taint can flow from `e1` to `e2` in zero or more local (intra-procedural)
+ * steps.
+ */
+predicate localExprTaint(Expr e1, Expr e2) {
+  localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
+}
+
+/**
+ * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  // Ordinary data flow
+  DataFlow::localFlowStep(nodeFrom, nodeTo)
+  or
+  localAdditionalTaintStep(nodeFrom, nodeTo)
+}
+
+// AdditionalTaintStep for global taint flow
+private newtype TUnit = TMkUnit()
+
+/** A singleton class containing a single dummy "unit" value. */
+private class Unit extends TUnit {
+  /** Gets a textual representation of this element. */
+  string toString() { result = "unit" }
+}
+
+/**
+ * A unit class for adding additional taint steps.
+ *
+ * Extend this class to add additional taint steps that should apply to all
+ * taint configurations.
+ */
+class AdditionalTaintStep extends Unit {
+  /**
+   * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
+   * step for all configurations.
+   */
+  abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
+}