Merge remote-tracking branch 'origin/main' into jty/python/nosqlInjection

2026-05-02 20:25:13 +02:00 · 2021-08-10 20:01:08 +02:00
parent 6f09b95019 d658ef1dcd
commit e6ce10b5c5
3028 changed files with 246210 additions and 35181 deletions
--- a/python/ql/test/TestUtilities/InlineExpectationsTest.qll
+++ b/python/ql/test/TestUtilities/InlineExpectationsTest.qll
@@ -181,14 +181,14 @@ private int getEndOfColumnPosition(int start, string content) {
    min(string name, int cand |
      exists(TNamedColumn(name)) and
      cand = content.indexOf(name + ":") and
-      cand > start
+      cand >= start
    |
      cand
    )
  or
  not exists(string name |
    exists(TNamedColumn(name)) and
-    content.indexOf(name + ":") > start
+    content.indexOf(name + ":") >= start
  ) and
  result = content.length()
 }
--- a/python/ql/test/experimental/dataflow/ApiGraphs/async_test.py
+++ b/python/ql/test/experimental/dataflow/ApiGraphs/async_test.py
@@ -0,0 +1,17 @@
+import pkg # $ use=moduleImport("pkg")
+
+async def foo():
+    coro = pkg.async_func() # $ use=moduleImport("pkg").getMember("async_func").getReturn()
+    coro # $ use=moduleImport("pkg").getMember("async_func").getReturn()
+    result = await coro # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
+    result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
+    return result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
+
+async def bar():
+    result = await pkg.async_func() # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
+    return result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
+
+def check_annotations():
+    # Just to make sure how annotations should look like :)
+    result = pkg.sync_func() # $ use=moduleImport("pkg").getMember("sync_func").getReturn()
+    return result # $ use=moduleImport("pkg").getMember("sync_func").getReturn()
--- a/python/ql/test/experimental/dataflow/ApiGraphs/options
+++ b/python/ql/test/experimental/dataflow/ApiGraphs/options
@@ -1 +1 @@
-semmle-extractor-options: --lang=3
+semmle-extractor-options: --lang=3 --max-import-depth=1
--- a/python/ql/test/experimental/dataflow/ApiGraphs/use.ql
+++ b/python/ql/test/experimental/dataflow/ApiGraphs/use.ql
@@ -13,7 +13,8 @@ class ApiUseTest extends InlineExpectationsTest {
    l = n.getLocation() and
    // Module variable nodes have no suitable location, so it's best to simply exclude them entirely
    // from the inline tests.
-    not n instanceof DataFlow::ModuleVariableNode
+    not n instanceof DataFlow::ModuleVariableNode and
+    exists(l.getFile().getRelativePath())
  }

  override predicate hasActualResult(Location location, string element, string tag, string value) {
--- a/python/ql/test/experimental/dataflow/method-calls/test.expected
+++ b/python/ql/test/experimental/dataflow/method-calls/test.expected
@@ -0,0 +1,8 @@
+conjunctive_lookup
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | bar |
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | foo |
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | bar |
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | foo |
+calls_lookup
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | foo |
+| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | bar |
--- a/python/ql/test/experimental/dataflow/method-calls/test.py
+++ b/python/ql/test/experimental/dataflow/method-calls/test.py
@@ -0,0 +1,6 @@
+if cond:
+    meth = obj1.foo
+else:
+    meth = obj2.bar
+
+meth()
--- a/python/ql/test/experimental/dataflow/method-calls/test.ql
+++ b/python/ql/test/experimental/dataflow/method-calls/test.ql
@@ -0,0 +1,18 @@
+import python
+import semmle.python.dataflow.new.DataFlow
+import experimental.dataflow.TestUtil.PrintNode
+
+query predicate conjunctive_lookup(
+  DataFlow::MethodCallNode methCall, string call, string object, string methodName
+) {
+  call = prettyNode(methCall) and
+  object = prettyNode(methCall.getObject()) and
+  methodName = methCall.getMethodName()
+}
+
+query predicate calls_lookup(
+  DataFlow::MethodCallNode methCall, string call, string object, string methodName
+) {
+  call = prettyNode(methCall) and
+  exists(DataFlow::Node o | methCall.calls(o, methodName) and object = prettyNode(o))
+}
--- a/python/ql/test/experimental/dataflow/sensitive-data/TestSensitiveDataSources.ql
+++ b/python/ql/test/experimental/dataflow/sensitive-data/TestSensitiveDataSources.ql
@@ -1,12 +1,17 @@
+// /**
+//  * @kind path-problem
+//  */
 import python
 import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
 import TestUtilities.InlineExpectationsTest
 import semmle.python.dataflow.new.SensitiveDataSources
+private import semmle.python.ApiGraphs

 class SensitiveDataSourcesTest extends InlineExpectationsTest {
  SensitiveDataSourcesTest() { this = "SensitiveDataSourcesTest" }

-  override string getARelevantTag() { result = "SensitiveDataSource" }
+  override string getARelevantTag() { result in ["SensitiveDataSource", "SensitiveUse"] }

  override predicate hasActualResult(Location location, string element, string tag, string value) {
    exists(location.getFile().getRelativePath()) and
@@ -15,6 +20,32 @@ class SensitiveDataSourcesTest extends InlineExpectationsTest {
      element = source.toString() and
      value = source.getClassification() and
      tag = "SensitiveDataSource"
+      or
+      exists(DataFlow::Node use |
+        any(SensitiveUseConfiguration config).hasFlow(source, use) and
+        location = use.getLocation() and
+        element = use.toString() and
+        value = source.getClassification() and
+        tag = "SensitiveUse"
+      )
    )
  }
 }
+
+class SensitiveUseConfiguration extends TaintTracking::Configuration {
+  SensitiveUseConfiguration() { this = "SensitiveUseConfiguration" }
+
+  override predicate isSource(DataFlow::Node node) { node instanceof SensitiveDataSource }
+
+  override predicate isSink(DataFlow::Node node) {
+    node = API::builtin("print").getACall().getArg(_)
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+    sensitiveDataExtraStepForCalls(node1, node2)
+  }
+}
+// import DataFlow::PathGraph
+// from SensitiveUseConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+// where cfg.hasFlowPath(source, sink)
+// select sink, source, sink, "taint from $@", source.getNode(), "here"
--- a/python/ql/test/experimental/dataflow/sensitive-data/test.py
+++ b/python/ql/test/experimental/dataflow/sensitive-data/test.py
@@ -1,5 +1,6 @@

-from not_found import get_passwd, account_id
+from not_found import get_passwd # $ SensitiveDataSource=password
+from not_found import account_id # $ SensitiveDataSource=id

 def get_password():
    pass
@@ -20,14 +21,94 @@ fetch_certificate() # $ SensitiveDataSource=certificate
 account_id() # $ SensitiveDataSource=id
 safe_to_store = encrypt_password(pwd)

+f = get_password
+f() # $ SensitiveDataSource=password
+
+# more tests of functions we don't have definition for
+x = unkown_func_not_even_imported_get_password() # $ SensitiveDataSource=password
+print(x) # $ SensitiveUse=password
+
+f = get_passwd
+x = f()
+print(x) # $ SensitiveUse=password
+
+import not_found
+f = not_found.get_passwd # $ SensitiveDataSource=password
+x = f()
+print(x) # $ SensitiveUse=password
+
+def my_func(non_sensitive_name):
+    x = non_sensitive_name()
+    print(x) # $ SensitiveUse=password
+f = not_found.get_passwd # $ SensitiveDataSource=password
+my_func(f)
+
 # attributes
 foo = ObjectFromDatabase()
 foo.secret # $ SensitiveDataSource=secret
 foo.username # $ SensitiveDataSource=id

+getattr(foo, "password") # $ SensitiveDataSource=password
+x = "password"
+getattr(foo, x) # $ SensitiveDataSource=password
+
+# based on variable/parameter names
+def my_func(password): # $ SensitiveDataSource=password
+    print(password) # $ SensitiveUse=password
+
+password = some_function() # $ SensitiveDataSource=password
+print(password) # $ SensitiveUse=password
+
+for password in some_function2(): # $ SensitiveDataSource=password
+    print(password) # $ SensitiveUse=password
+
+with some_function3() as password: # $ SensitiveDataSource=password
+    print(password) # $ SensitiveUse=password
+
+
 # Special handling of lookups of sensitive properties
-request.args["password"], # $ MISSING: SensitiveDataSource=password
+request.args["password"], # $ SensitiveDataSource=password
 request.args.get("password") # $ SensitiveDataSource=password

+x = "password"
+request.args.get(x) # $ SensitiveDataSource=password
+
 # I don't think handling `getlist` is super important, just included it to show what we don't handle
 request.args.getlist("password")[0] # $ MISSING: SensitiveDataSource=password
+
+from not_found import password2 as foo # $ SensitiveDataSource=password
+print(foo) # $ SensitiveUse=password
+
+# ------------------------------------------------------------------------------
+# cross-talk between different calls
+# ------------------------------------------------------------------------------
+
+# Case 1: providing name as argument
+
+_configuration = {"sleep_timer": 5, "mysql_password": "1234"}
+
+def get_config(key):
+    # Treating this as a SensitiveDataSource is questionable, since that will result in
+    # _all_ calls to `get_config` being treated as giving sensitive data
+    return _configuration[key]
+
+foo = get_config("mysql_password")
+print(foo) # $ MISSING: SensitiveUse=password
+
+bar = get_config("sleep_timer")
+print(bar)
+
+# Case 2: Providing function as argument
+
+def call_wrapper(func):
+    print("Will call", func)
+    # Treating this as a SensitiveDataSource is questionable, since that will result in
+    # _all_ calls to `call_wrapper` being treated as giving sensitive data
+    return func() # $ SensitiveDataSource=password
+
+foo = call_wrapper(get_password)
+print(foo) # $ SensitiveUse=password
+
+harmless = lambda: "bar"
+bar = call_wrapper(harmless)
+print(bar) # $ SPURIOUS: SensitiveUse=password
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
@@ -104,7 +104,7 @@ def non_syntactic():
    _str = str
    ensure_tainted(
        meth(), # $ MISSING: tainted
-        _str(ts), # $ MISSING: tainted
+        _str(ts), # $ tainted
    )


--- a/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql
+++ b/python/ql/test/experimental/dataflow/typetracking/moduleattr.ql
@@ -2,7 +2,7 @@ import python
 import semmle.python.dataflow.new.DataFlow
 import semmle.python.dataflow.new.TypeTracker

-private DataFlow::LocalSourceNode module_tracker(TypeTracker t) {
+private DataFlow::TypeTrackingNode module_tracker(TypeTracker t) {
  t.start() and
  result = DataFlow::importNode("module")
  or
@@ -13,7 +13,7 @@ query DataFlow::Node module_tracker() {
  module_tracker(DataFlow::TypeTracker::end()).flowsTo(result)
 }

-private DataFlow::LocalSourceNode module_attr_tracker(TypeTracker t) {
+private DataFlow::TypeTrackingNode module_attr_tracker(TypeTracker t) {
  t.startInAttr("attr") and
  result = module_tracker()
  or
--- a/python/ql/test/experimental/dataflow/typetracking/tracked.ql
+++ b/python/ql/test/experimental/dataflow/typetracking/tracked.ql
@@ -6,7 +6,7 @@ import TestUtilities.InlineExpectationsTest
 // -----------------------------------------------------------------------------
 // tracked
 // -----------------------------------------------------------------------------
-private DataFlow::LocalSourceNode tracked(TypeTracker t) {
+private DataFlow::TypeTrackingNode tracked(TypeTracker t) {
  t.start() and
  result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
  or
@@ -34,14 +34,14 @@ class TrackedTest extends InlineExpectationsTest {
 // -----------------------------------------------------------------------------
 // int + str
 // -----------------------------------------------------------------------------
-private DataFlow::LocalSourceNode int_type(TypeTracker t) {
+private DataFlow::TypeTrackingNode int_type(TypeTracker t) {
  t.start() and
  result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
  or
  exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
 }

-private DataFlow::LocalSourceNode string_type(TypeTracker t) {
+private DataFlow::TypeTrackingNode string_type(TypeTracker t) {
  t.start() and
  result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
  or
@@ -83,7 +83,7 @@ class TrackedStringTest extends InlineExpectationsTest {
 // -----------------------------------------------------------------------------
 // tracked_self
 // -----------------------------------------------------------------------------
-private DataFlow::LocalSourceNode tracked_self(TypeTracker t) {
+private DataFlow::TypeTrackingNode tracked_self(TypeTracker t) {
  t.start() and
  exists(Function f |
    f.isMethod() and
@@ -117,7 +117,7 @@ class TrackedSelfTest extends InlineExpectationsTest {
 // -----------------------------------------------------------------------------
 // This modeling follows the same pattern that we currently use in our real library modeling.
 /** Gets a reference to `foo` (fictive module). */
-private DataFlow::LocalSourceNode foo(DataFlow::TypeTracker t) {
+private DataFlow::TypeTrackingNode foo(DataFlow::TypeTracker t) {
  t.start() and
  result = DataFlow::importNode("foo")
  or
@@ -128,7 +128,7 @@ private DataFlow::LocalSourceNode foo(DataFlow::TypeTracker t) {
 DataFlow::Node foo() { foo(DataFlow::TypeTracker::end()).flowsTo(result) }

 /** Gets a reference to `foo.bar` (fictive module). */
-private DataFlow::LocalSourceNode foo_bar(DataFlow::TypeTracker t) {
+private DataFlow::TypeTrackingNode foo_bar(DataFlow::TypeTracker t) {
  t.start() and
  result = DataFlow::importNode("foo.bar")
  or
@@ -142,7 +142,7 @@ private DataFlow::LocalSourceNode foo_bar(DataFlow::TypeTracker t) {
 DataFlow::Node foo_bar() { foo_bar(DataFlow::TypeTracker::end()).flowsTo(result) }

 /** Gets a reference to `foo.bar.baz` (fictive attribute on `foo.bar` module). */
-private DataFlow::LocalSourceNode foo_bar_baz(DataFlow::TypeTracker t) {
+private DataFlow::TypeTrackingNode foo_bar_baz(DataFlow::TypeTracker t) {
  t.start() and
  result = DataFlow::importNode("foo.bar.baz")
  or
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.expected
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.expected
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.ql
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.ql
@@ -0,0 +1,3 @@
+import python
+import experimental.meta.ConceptsTest
+import experimental.semmle.python.frameworks.SqlAlchemy
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.expected
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.ql
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.ql
@@ -0,0 +1,2 @@
+import experimental.meta.InlineTaintTest
+import experimental.semmle.python.frameworks.SqlAlchemy
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/SqlExecution.py
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/SqlExecution.py
@@ -0,0 +1,57 @@
+import sqlalchemy
+from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.pool import StaticPool
+from sqlalchemy.orm import relationship, backref, sessionmaker, joinedload
+from sqlalchemy.sql import text
+
+engine = create_engine(
+    'sqlite:///:memory:',
+    echo=True,
+    connect_args={"check_same_thread": False},
+    poolclass=StaticPool
+)
+
+Base = declarative_base()
+
+class User(Base):
+    __tablename__ = 'users'
+
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+
+Base.metadata.create_all(engine)
+
+Session = sessionmaker(bind=engine)
+session = Session()
+
+ed_user = User(name='ed')
+ed_user2 = User(name='george')
+
+session.add(ed_user)
+session.add(ed_user2)
+
+session.commit()
+
+# Injection without requiring the text() taint-step
+session.query(User).filter_by(name="some sql")  # $ MISSING: getSql="some sql"
+session.scalar("some sql")  # $ getSql="some sql"
+engine.scalar("some sql")  # $ getSql="some sql"
+session.execute("some sql")  # $ getSql="some sql"
+
+with engine.connect() as connection:
+    connection.execute("some sql")  # $ getSql="some sql"
+
+with engine.begin() as connection:
+    connection.execute("some sql")  # $ getSql="some sql"
+
+# Injection requiring the text() taint-step
+t = text("some sql")
+session.query(User).filter(t)  # $ getSql=t
+session.query(User).group_by(User.id).having(t)  # $ getSql=User.id MISSING: getSql=t
+session.query(User).group_by(t).first()  # $ getSql=t
+session.query(User).order_by(t).first()  # $ getSql=t
+
+query = select(User).where(User.name == t)  # $ MISSING: getSql=t
+with engine.connect() as conn:
+    conn.execute(query) # $ getSql=query
--- a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/taint_test.py
+++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/taint_test.py
@@ -0,0 +1,12 @@
+import sqlalchemy
+
+def test_taint():
+    ts = TAINTED_STRING
+
+    ensure_tainted(
+        ts, # $ tainted
+        sqlalchemy.text(ts), # $ tainted
+        sqlalchemy.sql.text(ts),# $ tainted
+        sqlalchemy.sql.expression.text(ts),# $ tainted
+        sqlalchemy.sql.expression.TextClause(ts),# $ tainted
+    )
--- a/python/ql/test/experimental/meta/ConceptsTest.qll
+++ b/python/ql/test/experimental/meta/ConceptsTest.qll
@@ -93,6 +93,23 @@ class EncodingTest extends InlineExpectationsTest {
  }
 }

+class LoggingTest extends InlineExpectationsTest {
+  LoggingTest() { this = "LoggingTest" }
+
+  override string getARelevantTag() { result in ["loggingInput"] }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    exists(Logging logging, DataFlow::Node data |
+      location = data.getLocation() and
+      element = data.toString() and
+      value = prettyNodeForInlineTest(data) and
+      data = logging.getAnInput() and
+      tag = "loggingInput"
+    )
+  }
+}
+
 class CodeExecutionTest extends InlineExpectationsTest {
  CodeExecutionTest() { this = "CodeExecutionTest" }

@@ -129,6 +146,38 @@ class SqlExecutionTest extends InlineExpectationsTest {
  }
 }

+class EscapingTest extends InlineExpectationsTest {
+  EscapingTest() { this = "EscapingTest" }
+
+  override string getARelevantTag() { result in ["escapeInput", "escapeOutput", "escapeKind"] }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    exists(Escaping esc |
+      exists(DataFlow::Node data |
+        location = data.getLocation() and
+        element = data.toString() and
+        value = prettyNodeForInlineTest(data) and
+        (
+          data = esc.getAnInput() and
+          tag = "escapeInput"
+          or
+          data = esc.getOutput() and
+          tag = "escapeOutput"
+        )
+      )
+      or
+      exists(string format |
+        location = esc.getLocation() and
+        element = format and
+        value = format and
+        format = esc.getKind() and
+        tag = "escapeKind"
+      )
+    )
+  }
+}
+
 class HttpServerRouteSetupTest extends InlineExpectationsTest {
  HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }

@@ -252,6 +301,38 @@ class HttpServerHttpRedirectResponseTest extends InlineExpectationsTest {
  }
 }

+class HttpServerCookieWriteTest extends InlineExpectationsTest {
+  HttpServerCookieWriteTest() { this = "HttpServerCookieWriteTest" }
+
+  override string getARelevantTag() {
+    result in ["CookieWrite", "CookieRawHeader", "CookieName", "CookieValue"]
+  }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    exists(HTTP::Server::CookieWrite cookieWrite |
+      location = cookieWrite.getLocation() and
+      (
+        element = cookieWrite.toString() and
+        value = "" and
+        tag = "CookieWrite"
+        or
+        element = cookieWrite.toString() and
+        value = prettyNodeForInlineTest(cookieWrite.getHeaderArg()) and
+        tag = "CookieRawHeader"
+        or
+        element = cookieWrite.toString() and
+        value = prettyNodeForInlineTest(cookieWrite.getNameArg()) and
+        tag = "CookieName"
+        or
+        element = cookieWrite.toString() and
+        value = prettyNodeForInlineTest(cookieWrite.getValueArg()) and
+        tag = "CookieValue"
+      )
+    )
+  }
+}
+
 class FileSystemAccessTest extends InlineExpectationsTest {
  FileSystemAccessTest() { this = "FileSystemAccessTest" }

@@ -269,6 +350,23 @@ class FileSystemAccessTest extends InlineExpectationsTest {
  }
 }

+class FileSystemWriteAccessTest extends InlineExpectationsTest {
+  FileSystemWriteAccessTest() { this = "FileSystemWriteAccessTest" }
+
+  override string getARelevantTag() { result = "fileWriteData" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    exists(FileSystemWriteAccess write, DataFlow::Node data |
+      data = write.getADataNode() and
+      location = data.getLocation() and
+      element = data.toString() and
+      value = prettyNodeForInlineTest(data) and
+      tag = "fileWriteData"
+    )
+  }
+}
+
 class PathNormalizationTest extends InlineExpectationsTest {
  PathNormalizationTest() { this = "PathNormalizationTest" }

--- a/python/ql/test/experimental/query-tests/Security/CWE-287/ImproperLdapAuth.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/ImproperLdapAuth.expected
@@ -0,0 +1,10 @@
+| auth_bad_2.py:19:5:19:42 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_2.py:33:5:33:44 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_2.py:47:5:47:43 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_2.py:60:5:60:52 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_2.py:73:5:73:39 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_2.py:87:5:87:48 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
+| auth_bad_3.py:19:12:19:43 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
+| auth_bad_3.py:33:12:33:57 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
+| auth_bad_3.py:46:12:46:55 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
+| auth_bad_3.py:60:12:60:42 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
--- a/python/ql/test/experimental/query-tests/Security/CWE-287/ImproperLdapAuth.qlref
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/ImproperLdapAuth.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-287/ImproperLdapAuth.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-287/auth_bad_2.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/auth_bad_2.py
@@ -0,0 +1,92 @@
+from flask import request, Flask
+import ldap
+import ldap.filter
+import ldap.dn
+
+app = Flask(__name__)
+
+
+@app.route("/simple_bind_example")
+def simple_bind_example():
+    """
+    The bind's password argument is not set
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.simple_bind('cn=root')
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/simple_bind_s_example")
+def simple_bind_s_example():
+    """
+    The bind's password argument is not set
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.simple_bind_s('cn=root')
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/bind_s_example")
+def bind_s_example():
+    """
+    The bind's password argument is set to None
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind_s('cn=root', None)
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+@app.route("/bind_s_example")
+def bind_s_example_kwargs():
+    """
+    The bind's password argument is set to None
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind_s(who='cn=root', cred=None)
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+@app.route("/bind_example")
+def bind_example():
+    """
+    The bind's password argument is an empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind('cn=root', "")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/bind_example")
+def bind_example():
+    """
+    The bind's password argument is an empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind(who='cn=root', cred="")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+# if __name__ == "__main__":
+#     app.run(debug=True)
--- a/python/ql/test/experimental/query-tests/Security/CWE-287/auth_bad_3.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/auth_bad_3.py
@@ -0,0 +1,65 @@
+from ldap3 import Server, Connection, ALL
+from flask import request, Flask
+from ldap3.utils.dn import escape_rdn
+from ldap3.utils.conv import escape_filter_chars
+
+app = Flask(__name__)
+
+
+@app.route("/passwordNone")
+def passwordNone():
+    """
+    The bind's password argument is set to None
+    """
+
+    dn = "dc={}".format(escape_rdn(request.args['dc']))
+    search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
+
+    srv = Server('servername', get_info=ALL)
+    conn = Connection(srv, 'user_dn', None)
+    status, result, response, _ = conn.search(dn, search_filter)
+
+
+@app.route("/passwordNone")
+def passwordNoneKwargs():
+    """
+    The bind's password argument is set to None
+    """
+
+    dn = "dc={}".format(escape_rdn(request.args['dc']))
+    search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
+
+    srv = Server('servername', get_info=ALL)
+    conn = Connection(srv, user='user_dn', password=None)
+    status, result, response, _ = conn.search(dn, search_filter)
+
+@app.route("/passwordEmpty")
+def passwordEmpty():
+    """
+    The bind's password argument is an empty string
+    """
+
+    dn = "dc={}".format(escape_rdn(request.args['dc']))
+    search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
+
+    srv = Server('servername', get_info=ALL)
+    conn = Connection(srv, user='user_dn', password="")
+    status, result, response, _ = conn.search(dn, search_filter)
+
+
+@app.route("/notPassword")
+def notPassword():
+    """
+    The bind's password argument is not set
+    """
+
+    dn = "dc={}".format(escape_rdn(request.args['dc']))
+    search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
+
+    srv = Server('servername', get_info=ALL)
+    conn = Connection(srv, user='user_dn')
+    status, result, response, _ = conn.search(dn, search_filter)
+
+
+# if __name__ == "__main__":
+#     app.run(debug=True)
--- a/python/ql/test/experimental/query-tests/Security/CWE-287/auth_good_2.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/auth_good_2.py
@@ -0,0 +1,65 @@
+from flask import request, Flask
+import ldap
+import ldap.filter
+import ldap.dn
+
+app = Flask(__name__)
+
+
+@app.route("/simple_bind_example")
+def simple_bind_example():
+    """
+    The bind's password argument is a non-empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.simple_bind('cn=root', "SecurePa$$!")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/simple_bind_s_example")
+def simple_bind_s_example():
+    """
+    The bind's password argument is a non-empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.simple_bind_s('cn=root', "SecurePa$$!")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/bind_s_example")
+def bind_s_example():
+    """
+    The bind's password argument is a non-empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind_s('cn=root', "SecurePa$$!")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+
+@app.route("/bind_example")
+def bind_example():
+    """
+    The bind's password argument is a non-empty string
+    """
+
+    dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
+    search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
+    ldap_connection.bind('cn=root', "SecurePa$$!")
+    user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
+
+# if __name__ == "__main__":
+#     app.run(debug=True)
--- a/python/ql/test/experimental/query-tests/Security/CWE-287/auth_good_3.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-287/auth_good_3.py
@@ -0,0 +1,24 @@
+from ldap3 import Server, Connection, ALL
+from flask import request, Flask
+from ldap3.utils.dn import escape_rdn
+from ldap3.utils.conv import escape_filter_chars
+
+app = Flask(__name__)
+
+
+@app.route("/passwordFromEnv")
+def passwordFromEnv():
+    """
+    The bind's password argument is a non-empty string
+    """
+
+    dn = "dc={}".format(escape_rdn(request.args['dc']))
+    search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
+
+    srv = Server('servername', get_info=ALL)
+    conn = Connection(srv, user='user_dn',
+                      password="SecurePa$$!")
+    status, result, response, _ = conn.search(dn, search_filter)
+
+# if __name__ == "__main__":
+#     app.run(debug=True)
--- a/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.expected
+++ b/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.expected
@@ -1,5 +0,0 @@
-| ClickHouseDriver.py:15:22:15:106 | ControlFlowNode for Attribute() | ClickHouseDriver.py:15:52:15:105 | ControlFlowNode for BinaryExpr |
-| ClickHouseDriver.py:18:5:18:87 | ControlFlowNode for Attribute() | ClickHouseDriver.py:18:33:18:86 | ControlFlowNode for BinaryExpr |
-| ClickHouseDriver.py:22:5:22:62 | ControlFlowNode for Attribute() | ClickHouseDriver.py:22:33:22:37 | ControlFlowNode for query |
-| ClickHouseDriver.py:27:5:27:74 | ControlFlowNode for Attribute() | ClickHouseDriver.py:27:20:27:73 | ControlFlowNode for BinaryExpr |
-| ClickHouseDriver.py:30:5:30:89 | ControlFlowNode for Attribute() | ClickHouseDriver.py:30:35:30:88 | ControlFlowNode for BinaryExpr |
--- a/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.py
+++ b/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.py
@@ -1,32 +0,0 @@
-from django.conf.urls import url
-from clickhouse_driver import Client
-from clickhouse_driver import connect
-from aioch import Client as aiochClient
-
-# Dummy Client subclass
-class MyClient(Client):
-    def dummy(self):
-        return None
-
-def show_user(request, username):
-
-    # BAD -- Untrusted user input is directly injected into the sql query using async library 'aioch'
-    aclient = aiochClient("localhost")
-    progress = await aclient.execute_with_progress("SELECT * FROM users WHERE username = '%s'" % username)
-
-    # BAD -- Untrusted user input is directly injected into the sql query using native client of library 'clickhouse_driver'
-    Client('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
-
-    # GOOD -- query uses prepared statements
-    query = "SELECT * FROM users WHERE username = %(username)s"
-    Client('localhost').execute(query, {"username": username})
-
-    # BAD -- Untrusted user input is directly injected into the sql query using PEP249 interface
-    conn = connect('clickhouse://localhost')
-    cursor = conn.cursor()
-    cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
-
-    # BAD -- Untrusted user input is directly injected into the sql query using MyClient, which is a subclass of Client
-    MyClient('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
-
-urlpatterns = [url(r'^users/(?P<username>[^/]+)$', show_user)]
--- a/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.ql
+++ b/python/ql/test/experimental/semmle/python/frameworks/clickhouse-driver/ClickHouseDriver.ql
@@ -1,6 +0,0 @@
-import python
-import experimental.semmle.python.frameworks.ClickHouseDriver
-import semmle.python.Concepts
-
-from SqlExecution s
-select s, s.getSql()
--- a/python/ql/test/library-tests/frameworks/aioch/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/aioch/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/aioch/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/aioch/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/aioch/options
+++ b/python/ql/test/library-tests/frameworks/aioch/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=1 --lang=3
--- a/python/ql/test/library-tests/frameworks/aioch/sql_test.py
+++ b/python/ql/test/library-tests/frameworks/aioch/sql_test.py
@@ -0,0 +1,30 @@
+import aioch
+
+
+SQL = "SOME SQL"
+
+
+async def aioch_test():
+    client = aioch.Client("localhost")
+
+    await client.execute(SQL) # $ getSql=SQL
+    await client.execute(query=SQL) # $ getSql=SQL
+
+    await client.execute_with_progress(SQL) # $ getSql=SQL
+    await client.execute_with_progress(query=SQL) # $ getSql=SQL
+
+    await client.execute_iter(SQL) # $ getSql=SQL
+    await client.execute_iter(query=SQL) # $ getSql=SQL
+
+
+# Using custom client (this has been seen done for the blocking version in
+# `clickhouse_driver` PyPI package)
+
+
+class MyClient(aioch.Client):
+    pass
+
+
+async def test_custom_client():
+    client = MyClient("localhost")
+    await client.execute(SQL) # $ getSql=SQL
--- a/python/ql/test/library-tests/frameworks/aiohttp/response_test.py
+++ b/python/ql/test/library-tests/frameworks/aiohttp/response_test.py
@@ -65,6 +65,20 @@ async def redirect_302(request): # $ requestHandler
    else:
        raise web.HTTPFound(location="/logout") # $ HttpResponse HttpRedirectResponse mimetype=application/octet-stream redirectLocation="/logout"

+################################################################################
+# Cookies
+################################################################################
+
+@routes.get("/setting_cookie") # $ routeSetup="/setting_cookie"
+async def setting_cookie(request): # $ requestHandler
+    resp = web.Response(text="foo") # $ HttpResponse mimetype=text/plain responseBody="foo"
+    resp.cookies["key"] = "value" # $ CookieWrite CookieName="key" CookieValue="value"
+    resp.headers["Set-Cookie"] = "key2=value2" # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
+    resp.set_cookie("key3", "value3") # $ CookieWrite CookieName="key3" CookieValue="value3"
+    resp.set_cookie(name="key3", value="value3") # $ CookieWrite CookieName="key3" CookieValue="value3"
+    resp.del_cookie("key4") # $ CookieWrite CookieName="key4"
+    return resp
+

 if __name__ == "__main__":
    app = web.Application()
--- a/python/ql/test/library-tests/frameworks/clickhouse_driver/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/clickhouse_driver/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/clickhouse_driver/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/clickhouse_driver/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/clickhouse_driver/sql_test.py
+++ b/python/ql/test/library-tests/frameworks/clickhouse_driver/sql_test.py
@@ -0,0 +1,42 @@
+import clickhouse_driver
+
+
+SQL = "SOME SQL"
+
+
+# Normal operation
+client = clickhouse_driver.client.Client("localhost")
+
+client.execute(SQL) # $ getSql=SQL
+client.execute(query=SQL) # $ getSql=SQL
+
+client.execute_with_progress(SQL) # $ getSql=SQL
+client.execute_with_progress(query=SQL) # $ getSql=SQL
+
+client.execute_iter(SQL) # $ getSql=SQL
+client.execute_iter(query=SQL) # $ getSql=SQL
+
+
+# commonly used alias
+client = clickhouse_driver.Client("localhost")
+client.execute(SQL) # $ getSql=SQL
+
+
+# Using PEP249 interface
+conn = clickhouse_driver.connect('clickhouse://localhost')
+cursor = conn.cursor()
+cursor.execute(SQL) # $ getSql=SQL
+
+
+# Using custom client
+#
+# examples from real world code
+# https://github.com/Altinity/clickhouse-mysql-data-reader/blob/3b1b7088751b05e5bbf45890c5949b58208c2343/clickhouse_mysql/dbclient/chclient.py#L10
+# https://github.com/Felixoid/clickhouse-plantuml/blob/d8b2ba7d164a836770ec21f5e4035dfb04c41d9c/clickhouse_plantuml/client.py#L9
+
+
+class MyClient(clickhouse_driver.Client):
+    pass
+
+
+MyClient("localhost").execute(SQL) # $ getSql=SQL
--- a/python/ql/test/library-tests/frameworks/django-v2-v3/response_test.py
+++ b/python/ql/test/library-tests/frameworks/django-v2-v3/response_test.py
@@ -103,3 +103,17 @@ class CustomJsonResponse(JsonResponse):

 def safe__custom_json_response(request):
    return CustomJsonResponse("ACME Responses", {"foo": request.GET.get("foo")})  # $HttpResponse mimetype=application/json MISSING: responseBody=Dict SPURIOUS: responseBody="ACME Responses"
+
+################################################################################
+# Cookies
+################################################################################
+
+def setting_cookie(request):
+    resp = HttpResponse() # $ HttpResponse mimetype=text/html
+    resp.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
+    resp.set_cookie(key="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
+    resp.headers["Set-Cookie"] = "key2=value2" # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
+    resp.cookies["key3"] = "value3" # $ CookieWrite CookieName="key3" CookieValue="value3"
+    resp.delete_cookie("key4") # $ CookieWrite CookieName="key4"
+    resp.delete_cookie(key="key4") # $ CookieWrite CookieName="key4"
+    return resp
--- a/python/ql/test/library-tests/frameworks/flask/response_test.py
+++ b/python/ql/test/library-tests/frameworks/flask/response_test.py
@@ -184,6 +184,20 @@ def redirect_simple():  # $requestHandler
    return resp  # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=resp


+################################################################################
+# Cookies
+################################################################################
+
+@app.route("/setting_cookie")  # $routeSetup="/setting_cookie"
+def setting_cookie():  # $requestHandler
+    resp = make_response() # $ HttpResponse mimetype=text/html
+    resp.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
+    resp.set_cookie(key="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
+    resp.headers.add("Set-Cookie", "key2=value2") # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
+    resp.delete_cookie("key3") # $ CookieWrite CookieName="key3"
+    resp.delete_cookie(key="key3") # $ CookieWrite CookieName="key3"
+    return resp  # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=resp
+
 ################################################################################


--- a/python/ql/test/library-tests/frameworks/jmespath/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/jmespath/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/jmespath/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/jmespath/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/jmespath/InlineTaintTest.expected
+++ b/python/ql/test/library-tests/frameworks/jmespath/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
--- a/python/ql/test/library-tests/frameworks/jmespath/InlineTaintTest.ql
+++ b/python/ql/test/library-tests/frameworks/jmespath/InlineTaintTest.ql
@@ -0,0 +1 @@
+import experimental.meta.InlineTaintTest
--- a/python/ql/test/library-tests/frameworks/jmespath/taint_test.py
+++ b/python/ql/test/library-tests/frameworks/jmespath/taint_test.py
@@ -0,0 +1,33 @@
+import jmespath
+
+def test_taint():
+    untrusted_data = TAINTED_DICT
+
+    safe_expression = jmespath.compile("foo.bar")
+
+    ensure_tainted(
+        jmespath.search("foo.bar", untrusted_data), # $ tainted
+        jmespath.search("foo.bar", data=untrusted_data), # $ tainted
+
+        safe_expression.search(untrusted_data), # $ tainted
+        safe_expression.search(value=untrusted_data) # $ tainted
+    )
+
+    # since ```jmespath.search("{wat: `foo`}", {})``` works (and outputs a dictionary),
+    # we _could_ add a taint-step from the search expression to the output. However, it
+    # seems more likely to lead to FPs than good results, so these have deliberately not
+    # been included.
+
+    ts = TAINTED_STRING
+    safe_data = {"foo": "bar"}
+
+    unsafe_expression = jmespath.compile(ts)
+
+    ensure_not_tainted(
+        jmespath.search(ts, safe_data),
+        jmespath.search(expression=ts, data=safe_data),
+
+        unsafe_expression,
+        unsafe_expression.search(safe_data),
+        unsafe_expression.search(value=safe_data),
+    )
--- a/python/ql/test/library-tests/frameworks/markupsafe/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/markupsafe/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/markupsafe/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/markupsafe/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/markupsafe/InlineTaintTest.expected
+++ b/python/ql/test/library-tests/frameworks/markupsafe/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
--- a/python/ql/test/library-tests/frameworks/markupsafe/InlineTaintTest.ql
+++ b/python/ql/test/library-tests/frameworks/markupsafe/InlineTaintTest.ql
@@ -0,0 +1,13 @@
+import experimental.meta.InlineTaintTest
+import semmle.python.Concepts
+
+class HtmlSpecialization extends TestTaintTrackingConfiguration {
+  // TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
+  // `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
+  // is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
+  // python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
+  //
+  // However, it is better than `getAnInput()`. Due to use-use flow, that would remove
+  // the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
+  override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
+}
--- a/python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
+++ b/python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
@@ -0,0 +1,81 @@
+from markupsafe import escape, escape_silent, Markup
+
+def ensure_tainted(*args):
+    print("ensure_tainted")
+    for x in args: print(" ", x)
+
+def ensure_not_tainted(*args):
+    print("ensure_not_tainted")
+    for x in args: print(" ", x)
+
+# these contain `{}` so we can use .format, and `%s` so we can use %-style formatting
+TAINTED_STRING = '<"TAINTED_STRING" {} %s>'
+SAFE = "SAFE {} %s"
+
+def test():
+    ts = TAINTED_STRING
+
+    # class `Markup` can be used for things that are already safe.
+    # if used with any text in a string operation, that other text will be escaped.
+    #
+    # see https://markupsafe.palletsprojects.com/en/2.0.x/
+    m_unsafe = Markup(TAINTED_STRING)
+    m_safe = Markup(SAFE)
+
+
+    # this 3 tests might look strange, but the purpose is to check we still treat `ts`
+    # as tainted even after it has been escaped in some place. This _might_ not be the
+    # case since data-flow library has taint-steps from adjacent uses...
+    ensure_tainted(ts) # $ tainted
+    ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
+    ensure_tainted(ts) # $ tainted
+
+    ensure_tainted(
+        ts, # $ tainted
+        m_unsafe, # $ tainted
+        m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
+        SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
+        m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
+        m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
+        m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
+
+        m_safe.format(m_unsafe), # $ tainted
+        m_safe % m_unsafe, # $ tainted
+
+        escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
+        escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
+    )
+
+    ensure_not_tainted(
+        escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
+        escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
+
+        Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
+
+        m_safe,
+        m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
+        ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
+        m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
+        m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
+
+        escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
+        escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
+        Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
+    )
+
+    # flask re-exports these, as:
+    # flask.escape = markupsafe.escape
+    # flask.Markup = markupsafe.Markup
+    import flask
+
+    ensure_tainted(
+        flask.Markup(ts), # $ tainted
+    )
+
+    ensure_not_tainted(
+        flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
+        flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
+    )
+
+
+test()
--- a/python/ql/test/library-tests/frameworks/modeling-example/SharedCode.qll
+++ b/python/ql/test/library-tests/frameworks/modeling-example/SharedCode.qll
@@ -6,7 +6,7 @@ private import semmle.python.dataflow.new.TaintTracking
 /** A data-flow Node representing an instance of MyClass. */
 abstract class MyClass extends DataFlow::Node { }

-private DataFlow::LocalSourceNode myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
+private DataFlow::TypeTrackingNode myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
  t.startInAttr("get_value") and
  result = qualifier
  or
--- a/python/ql/test/library-tests/frameworks/rsa/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/rsa/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/rsa/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/rsa/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/library-tests/frameworks/rsa/InlineTaintTest.expected
+++ b/python/ql/test/library-tests/frameworks/rsa/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
--- a/python/ql/test/library-tests/frameworks/rsa/InlineTaintTest.ql
+++ b/python/ql/test/library-tests/frameworks/rsa/InlineTaintTest.ql
@@ -0,0 +1 @@
+import experimental.meta.InlineTaintTest
--- a/python/ql/test/library-tests/frameworks/rsa/test_rsa.py
+++ b/python/ql/test/library-tests/frameworks/rsa/test_rsa.py
@@ -0,0 +1,68 @@
+# Following examples from https://stuvel.eu/python-rsa-doc/usage.html
+import rsa
+
+# using a rather low keysize, since otherwise it takes quite long to run.
+(public_key, private_key) = rsa.newkeys(512) # $ PublicKeyGeneration keySize=512
+(public_key, private_key) = rsa.newkeys(nbits=512) # $ PublicKeyGeneration keySize=512
+
+
+# ------------------------------------------------------------------------------
+# encrypt/decrypt
+# ------------------------------------------------------------------------------
+
+# Note: These are using PKCS#1 v1.5
+
+print("encrypt/decrypt")
+
+secret_message = b"secret message"
+
+encrypted = rsa.encrypt(secret_message, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=secret_message
+encrypted = rsa.encrypt(message=secret_message, pub_key=public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=secret_message
+
+print("encrypted={}".format(encrypted))
+
+print()
+
+decrypted = rsa.decrypt(encrypted, private_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=encrypted
+decrypted = rsa.decrypt(crypto=encrypted, priv_key=private_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=encrypted
+
+print("decrypted={}".format(decrypted))
+assert decrypted == secret_message
+
+print("\n---\n")
+
+# ------------------------------------------------------------------------------
+# sign/verify
+# ------------------------------------------------------------------------------
+
+# Note: These are using PKCS#1 v1.5
+
+print("sign/verify")
+
+message = b"message"
+other_message = b"other message"
+
+hash = rsa.compute_hash(message, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
+hash = rsa.compute_hash(message=message, method_name="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
+signature_from_hash = rsa.sign_hash(hash, private_key, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=hash
+signature_from_hash = rsa.sign_hash(hash_value=hash, priv_key=private_key, hash_method="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=hash
+
+signature = rsa.sign(message, private_key, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
+signature = rsa.sign(message=message, priv_key=private_key, hash_method="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
+
+assert signature == signature_from_hash
+
+print("signature={}".format(signature))
+
+print()
+
+rsa.verify(message, signature, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=message CryptographicOperationInput=signature
+rsa.verify(message=message, signature=signature, pub_key=public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=message CryptographicOperationInput=signature
+
+print("Signature verified (as expected)")
+
+try:
+    rsa.verify(other_message, signature, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=other_message CryptographicOperationInput=signature
+    raise Exception("Signature verified (unexpected)")
+except rsa.VerificationError:
+    print("Signature mismatch (as expected)")
--- a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
@@ -0,0 +1,23 @@
+from pathlib import Path, PosixPath, WindowsPath
+
+p = Path("filepath")
+posix = PosixPath("posix/filepath")
+windows = WindowsPath("windows/filepath")
+
+p.chmod(0o777)  # $ getAPathArgument=p
+posix.chmod(0o777)  # $ getAPathArgument=posix
+windows.chmod(0o777)  # $ getAPathArgument=windows
+
+with p.open() as f:  # $ getAPathArgument=p
+    f.read()
+
+p.write_bytes(b"hello")  # $ getAPathArgument=p fileWriteData=b"hello"
+p.write_text("hello")  # $ getAPathArgument=p fileWriteData="hello"
+p.open("wt").write("hello")  # $ getAPathArgument=p fileWriteData="hello"
+
+name = windows.parent.name
+o = open
+o(name)  # $ getAPathArgument=name
+
+wb = p.write_bytes
+wb(b"hello")  # $ getAPathArgument=p fileWriteData=b"hello"
--- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py
@@ -1,39 +1,29 @@
 import builtins
 import io

-open("filepath")  # $getAPathArgument="filepath"
-open(file="filepath")  # $getAPathArgument="filepath"
+open("filepath")  # $ getAPathArgument="filepath"
+open(file="filepath")  # $ getAPathArgument="filepath"

 o = open

-o("filepath")  # $getAPathArgument="filepath"
-o(file="filepath")  # $getAPathArgument="filepath"
+o("filepath")  # $ getAPathArgument="filepath"
+o(file="filepath")  # $ getAPathArgument="filepath"


-builtins.open("filepath")  # $getAPathArgument="filepath"
-builtins.open(file="filepath")  # $getAPathArgument="filepath"
+builtins.open("filepath")  # $ getAPathArgument="filepath"
+builtins.open(file="filepath")  # $ getAPathArgument="filepath"


-io.open("filepath")  # $getAPathArgument="filepath"
-io.open(file="filepath")  # $getAPathArgument="filepath"
+io.open("filepath")  # $ getAPathArgument="filepath"
+io.open(file="filepath")  # $ getAPathArgument="filepath"

-from pathlib import Path, PosixPath, WindowsPath
+f = open("path") # $ getAPathArgument="path"
+f.write("foo") # $ getAPathArgument="path" fileWriteData="foo"
+lines = ["foo"]
+f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines

-p = Path("filepath")
-posix = PosixPath("posix/filepath")
-windows = WindowsPath("windows/filepath")

-p.chmod(0o777)  # $getAPathArgument=p
-posix.chmod(0o777)  # $getAPathArgument=posix
-windows.chmod(0o777)  # $getAPathArgument=windows
+def through_function(open_file):
+    open_file.write("foo") # $ fileWriteData="foo" getAPathArgument="path"

-with p.open() as f:  # $getAPathArgument=p
-    f.read()
-
-p.write_bytes(b"hello")  # $getAPathArgument=p
-
-name = windows.parent.name
-o(name)  # $getAPathArgument=name
-
-wb = p.write_bytes
-wb(b"hello")  # $getAPathArgument=p
+through_function(f)
--- a/python/ql/test/library-tests/frameworks/stdlib/Logging.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/Logging.py
@@ -0,0 +1,45 @@
+import logging
+
+# this bit just included to make this file runable
+logging.basicConfig(level=logging.DEBUG)
+
+password = "<pass>"
+msg = "foo %s"
+
+LOGGER = logging.getLogger("LOGGER")
+
+logging.info(msg, password) # $ loggingInput=msg loggingInput=password
+logging.info(msg="hello") # $ loggingInput="hello"
+
+logging.log(logging.INFO, msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.log(logging.INFO, msg, password) # $ loggingInput=msg loggingInput=password
+
+logging.root.info(msg, password) # $ loggingInput=msg loggingInput=password
+
+# test of all levels
+
+logging.critical(msg, password) # $ loggingInput=msg loggingInput=password
+logging.fatal(msg, password) # $ loggingInput=msg loggingInput=password
+logging.error(msg, password) # $ loggingInput=msg loggingInput=password
+logging.warning(msg, password) # $ loggingInput=msg loggingInput=password
+logging.warn(msg, password) # $ loggingInput=msg loggingInput=password
+logging.info(msg, password) # $ loggingInput=msg loggingInput=password
+logging.debug(msg, password) # $ loggingInput=msg loggingInput=password
+logging.exception(msg, password) # $ loggingInput=msg loggingInput=password
+
+LOGGER.critical(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.fatal(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.error(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.warning(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.warn(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.info(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.debug(msg, password) # $ loggingInput=msg loggingInput=password
+LOGGER.exception(msg, password) # $ loggingInput=msg loggingInput=password
+
+# not sure how to make these print anything, but just to show that it works
+logging.Logger("foo").info("hello") # $ loggingInput="hello"
+
+class MyLogger(logging.Logger):
+    pass
+
+MyLogger("bar").info("hello") # $ loggingInput="hello"
--- a/python/ql/test/library-tests/frameworks/tornado/response_test.py
+++ b/python/ql/test/library-tests/frameworks/tornado/response_test.py
@@ -58,6 +58,18 @@ class ExampleConnectionWrite(tornado.web.RequestHandler):
            stream.write(b"foo stream") # $ MISSING: HttpResponse responseBody=b"foo stream"
            stream.close()

+################################################################################
+# Cookies
+################################################################################
+
+class CookieWriting(tornado.web.RequestHandler):
+    def get(self):  # $ requestHandler
+        self.write("foo") # $ HttpResponse mimetype=text/html responseBody="foo"
+        self.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
+        self.set_cookie(name="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
+        self.set_header("Set-Cookie", "key2=value2") # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
+
+
 def make_app():
    return tornado.web.Application(
        [
@@ -66,6 +78,7 @@ def make_app():
            (r"/ExampleRedirect", ExampleRedirect), # $ routeSetup="/ExampleRedirect"
            (r"/ExampleConnectionWrite", ExampleConnectionWrite), # $ routeSetup="/ExampleConnectionWrite"
            (r"/ExampleConnectionWrite/(stream)", ExampleConnectionWrite), # $ routeSetup="/ExampleConnectionWrite/(stream)"
+            (r"/CookieWriting", CookieWriting), # $ routeSetup="/CookieWriting"
        ],
        debug=True,
    )
@@ -74,6 +87,7 @@ def make_app():
 if __name__ == "__main__":
    import tornado.ioloop

+    print("running on http://localhost:8888/")
    app = make_app()
    app.listen(8888)
    tornado.ioloop.IOLoop.current().start()
--- a/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.expected
+++ b/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.expected
--- a/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.ql
+++ b/python/ql/test/library-tests/frameworks/twisted/ConceptsTest.ql
@@ -0,0 +1,12 @@
+import python
+import experimental.meta.ConceptsTest
+
+class DedicatedResponseTest extends HttpServerHttpResponseTest {
+  DedicatedResponseTest() { file.getShortName() = "response_test.py" }
+}
+
+class OtherResponseTest extends HttpServerHttpResponseTest {
+  OtherResponseTest() { not this instanceof DedicatedResponseTest }
+
+  override string getARelevantTag() { result = "HttpResponse" }
+}
--- a/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.expected
+++ b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.expected
@@ -0,0 +1,3 @@
+argumentToEnsureNotTaintedNotMarkedAsSpurious
+untaintedArgumentToEnsureTaintedNotMarkedAsMissing
+failures
--- a/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.ql
+++ b/python/ql/test/library-tests/frameworks/twisted/InlineTaintTest.ql
@@ -0,0 +1 @@
+import experimental.meta.InlineTaintTest
--- a/python/ql/test/library-tests/frameworks/twisted/response_test.py
+++ b/python/ql/test/library-tests/frameworks/twisted/response_test.py
@@ -0,0 +1,80 @@
+from twisted.web.server import Site, Request, NOT_DONE_YET
+from twisted.web.resource import Resource
+from twisted.internet import reactor, endpoints, defer
+
+
+root = Resource()
+
+class Now(Resource):
+    def render(self, request: Request): # $ requestHandler
+        return b"now" # $ HttpResponse mimetype=text/html responseBody=b"now"
+
+
+class AlsoNow(Resource):
+    def render(self, request: Request): # $ requestHandler
+        request.write(b"also now") # $ HttpResponse mimetype=text/html responseBody=b"also now"
+        return b"" # $ HttpResponse mimetype=text/html responseBody=b""
+
+
+def process_later(request: Request):
+    print("process_later called")
+    request.write(b"later") # $ MISSING: responseBody=b"later"
+    request.finish()
+
+
+class Later(Resource):
+    def render(self, request: Request): # $ requestHandler
+        # process the request in 1 second
+        print("setting up callback for process_later")
+        reactor.callLater(1, process_later, request)
+        return NOT_DONE_YET # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=NOT_DONE_YET
+
+
+class PlainText(Resource):
+    def render(self, request: Request): # $ requestHandler
+        request.setHeader(b"content-type", "text/plain")
+        return b"this is plain text" # $ HttpResponse responseBody=b"this is plain text" SPURIOUS: mimetype=text/html MISSING: mimetype=text/plain
+
+
+class Redirect(Resource):
+    def render_GET(self, request: Request): # $ requestHandler
+        request.redirect("/new-location") # $ HttpRedirectResponse redirectLocation="/new-location" HttpResponse mimetype=text/html
+        # By default, this `hello` output is not returned... not even when
+        # requested with curl.
+        return b"hello" # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=b"hello"
+
+################################################################################
+# Cookies
+################################################################################
+
+class CookieWriting(Resource):
+    """Examples of providing values in response that is not in the body
+    """
+    def render_GET(self, request: Request): # $ requestHandler
+        request.addCookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
+        request.addCookie(k="key", v="value") # $ CookieWrite CookieName="key" CookieValue="value"
+        val = "key2=value"
+        request.cookies.append(val) # $ CookieWrite CookieRawHeader=val
+
+        request.responseHeaders.addRawHeader("key", "value")
+        request.setHeader("Set-Cookie", "key3=value3") # $ MISSING: CookieWrite CookieRawHeader="key3=value3"
+
+        return b"" # $ HttpResponse mimetype=text/html responseBody=b""
+
+
+root.putChild(b"now", Now())
+root.putChild(b"also-now", AlsoNow())
+root.putChild(b"later", Later())
+root.putChild(b"plain-text", PlainText())
+root.putChild(b"redirect", Redirect())
+root.putChild(b"setting_cookie", CookieWriting())
+
+
+if __name__ == "__main__":
+    factory = Site(root)
+    endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
+    endpoint.listen(factory)
+
+    print("Will run on http://localhost:8880")
+
+    reactor.run()
--- a/python/ql/test/library-tests/frameworks/twisted/routing_test.py
+++ b/python/ql/test/library-tests/frameworks/twisted/routing_test.py
@@ -0,0 +1,47 @@
+from twisted.web.server import Site, Request
+from twisted.web.resource import Resource
+from twisted.internet import reactor, endpoints
+
+
+root = Resource()
+
+
+class Foo(Resource):
+    def render(self, request: Request): # $ requestHandler
+        print(f"{request.content=}")
+        print(f"{request.cookies=}")
+        print(f"{request.received_cookies=}")
+        return b"I am Foo" # $ HttpResponse
+
+
+root.putChild(b"foo", Foo())
+
+
+class Child(Resource):
+    def __init__(self, name):
+        self.name = name.decode("utf-8")
+
+    def render_GET(self, request): # $ requestHandler
+        return f"Hi, I'm child '{self.name}'".encode("utf-8") # $ HttpResponse
+
+
+class Parent(Resource):
+    def getChild(self, path, request): # $ requestHandler
+        print(path, type(path))
+        return Child(path)
+
+    def render_GET(self, request): # $ requestHandler
+        return b"Hi, I'm parent" # $ HttpResponse
+
+
+root.putChild(b"parent", Parent())
+
+
+if __name__ == "__main__":
+    factory = Site(root)
+    endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
+    endpoint.listen(factory)
+
+    print("Will run on http://localhost:8880")
+
+    reactor.run()
--- a/python/ql/test/library-tests/frameworks/twisted/taint_test.py
+++ b/python/ql/test/library-tests/frameworks/twisted/taint_test.py
@@ -0,0 +1,70 @@
+from twisted.web.resource import Resource
+from twisted.web.server import Request
+
+class MyTaintTest(Resource):
+    def getChild(self, path, request): # $ requestHandler
+        ensure_tainted(path, request) # $ tainted
+
+    def render(self, request): # $ requestHandler
+        ensure_tainted(request) # $ tainted
+
+    def render_GET(self, request: Request): # $ requestHandler
+        # see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html
+        ensure_tainted(
+            request, # $ tainted
+
+            request.uri, # $ tainted
+            request.path, # $ tainted
+            request.prepath, # $ tainted
+            request.postpath, # $ tainted
+
+            # file-like
+            request.content, # $ tainted
+            request.content.read(), # $ MISSING: tainted
+
+            # Dict[bytes, List[bytes]] (for query args)
+            request.args, # $ tainted
+            request.args[b"key"], # $ tainted
+            request.args[b"key"][0], # $ tainted
+            request.args.get(b"key"), # $ tainted
+            request.args.get(b"key")[0], # $ tainted
+
+            request.received_cookies, # $ tainted
+            request.received_cookies["key"], # $ tainted
+            request.received_cookies.get("key"), # $ tainted
+            request.getCookie(b"key"), # $ tainted
+
+            # twisted.web.http_headers.Headers
+            # see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http_headers.Headers.html
+            request.requestHeaders, # $ tainted
+            request.requestHeaders.getRawHeaders("key"), # $ MISSING: tainted
+            request.requestHeaders.getRawHeaders("key")[0], # $ MISSING: tainted
+            request.requestHeaders.getAllRawHeaders(), # $ MISSING: tainted
+            list(request.requestHeaders.getAllRawHeaders()), # $ MISSING: tainted
+
+            request.getHeader("key"), # $ tainted
+            request.getAllHeaders(), # $ tainted
+            request.getAllHeaders()["key"], # $ tainted
+
+            request.user, # $ tainted
+            request.getUser(), # $ tainted
+
+            request.password, # $ tainted
+            request.getPassword(), # $ tainted
+
+            request.host, # $ tainted
+            request.getHost(), # $ tainted
+            request.getRequestHostname(), # $ tainted
+        )
+
+        # technically user-controlled, but unlikely to lead to vulnerabilities.
+        ensure_not_tainted(
+            request.method,
+        )
+
+        # not tainted at all
+        ensure_not_tainted(
+            # outgoing things
+            request.cookies,
+            request.responseHeaders,
+        )
--- a/python/ql/test/library-tests/regex/Alternation.ql
+++ b/python/ql/test/library-tests/regex/Alternation.ql
@@ -2,6 +2,8 @@ import python
 import semmle.python.regex

 from Regex r, int start, int end, int part_start, int part_end
-where r.alternationOption(start, end, part_start, part_end)
+where
+  r.getLocation().getFile().getBaseName() = "test.py" and
+  r.alternationOption(start, end, part_start, part_end)
 select r.getText(), start, end, r.getText().substring(start, end), part_start, part_end,
  r.getText().substring(part_start, part_end)
--- a/python/ql/test/library-tests/regex/GroupContents.ql
+++ b/python/ql/test/library-tests/regex/GroupContents.ql
@@ -2,6 +2,8 @@ import python
 import semmle.python.regex

 from Regex r, int start, int end, int part_start, int part_end
-where r.groupContents(start, end, part_start, part_end)
+where
+  r.getLocation().getFile().getBaseName() = "test.py" and
+  r.groupContents(start, end, part_start, part_end)
 select r.getText(), start, end, r.getText().substring(start, end), part_start, part_end,
  r.getText().substring(part_start, part_end)
--- a/python/ql/test/library-tests/regex/Mode.ql
+++ b/python/ql/test/library-tests/regex/Mode.ql
@@ -2,4 +2,5 @@ import python
 import semmle.python.regex

 from Regex r
+where r.getLocation().getFile().getBaseName() = "test.py"
 select r.getLocation().getStartLine(), r.getAMode()
--- a/python/ql/test/library-tests/regex/Qualified.expected
+++ b/python/ql/test/library-tests/regex/Qualified.expected
@@ -1,15 +1,15 @@
-| (?!not-this)^[A-Z_]+$ | 13 | 20 | false |
-| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true |
-| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true |
-| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true |
-| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true |
-| (?P<name>[\\w]+)\| | 9 | 14 | false |
-| \\A[+-]?\\d+ | 2 | 7 | true |
-| \\A[+-]?\\d+ | 7 | 10 | false |
-| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true |
-| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true |
-| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
-| ax{01,3} | 1 | 8 | false |
-| ax{3,} | 1 | 6 | false |
-| ax{3} | 1 | 5 | false |
-| ax{,3} | 1 | 6 | true |
+| (?!not-this)^[A-Z_]+$ | 13 | 20 | false | true |
+| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true | false |
+| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true | true |
+| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true | false |
+| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true | true |
+| (?P<name>[\\w]+)\| | 9 | 14 | false | true |
+| \\A[+-]?\\d+ | 2 | 7 | true | false |
+| \\A[+-]?\\d+ | 7 | 10 | false | true |
+| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true | true |
+| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true | true |
+| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false | true |
+| ax{01,3} | 1 | 8 | false | false |
+| ax{3,} | 1 | 6 | false | true |
+| ax{3} | 1 | 5 | false | false |
+| ax{,3} | 1 | 6 | true | false |
--- a/python/ql/test/library-tests/regex/Qualified.ql
+++ b/python/ql/test/library-tests/regex/Qualified.ql
@@ -1,6 +1,8 @@
 import python
 import semmle.python.regex

-from Regex r, int start, int end, boolean maybe_empty
-where r.qualifiedItem(start, end, maybe_empty)
-select r.getText(), start, end, maybe_empty
+from Regex r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
+where
+  r.getLocation().getFile().getBaseName() = "test.py" and
+  r.qualifiedItem(start, end, maybe_empty, may_repeat_forever)
+select r.getText(), start, end, maybe_empty, may_repeat_forever
--- a/python/ql/test/library-tests/regex/Regex.ql
+++ b/python/ql/test/library-tests/regex/Regex.ql
@@ -16,7 +16,7 @@ predicate part(Regex r, int start, int end, string kind) {
  or
  r.group(start, end) and not r.zeroWidthMatch(start, end) and kind = "non-empty group"
  or
-  r.qualifiedItem(start, end, _) and kind = "qualified"
+  r.qualifiedItem(start, end, _, _) and kind = "qualified"
 }

 from Regex r, int start, int end, string kind
--- a/python/ql/test/library-tests/regex/SubstructureTests.expected
+++ b/python/ql/test/library-tests/regex/SubstructureTests.expected
--- a/python/ql/test/library-tests/regex/SubstructureTests.ql
+++ b/python/ql/test/library-tests/regex/SubstructureTests.ql
@@ -0,0 +1,75 @@
+import python
+import TestUtilities.InlineExpectationsTest
+private import semmle.python.regex
+
+class CharacterSetTest extends InlineExpectationsTest {
+  CharacterSetTest() { this = "CharacterSetTest" }
+
+  override string getARelevantTag() { result = "charSet" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    location.getFile().getBaseName() = "charSetTest.py" and
+    exists(Regex re, int start, int end |
+      re.charSet(start, end) and
+      location = re.getLocation() and
+      element = re.getText().substring(start, end) and
+      value = start + ":" + end and
+      tag = "charSet"
+    )
+  }
+}
+
+class CharacterRangeTest extends InlineExpectationsTest {
+  CharacterRangeTest() { this = "CharacterRangeTest" }
+
+  override string getARelevantTag() { result = "charRange" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    location.getFile().getBaseName() = "charRangeTest.py" and
+    exists(Regex re, int start, int lower_end, int upper_start, int end |
+      re.charRange(_, start, lower_end, upper_start, end) and
+      location = re.getLocation() and
+      element = re.getText().substring(start, end) and
+      value = start + ":" + lower_end + "-" + upper_start + ":" + end and
+      tag = "charRange"
+    )
+  }
+}
+
+class EscapeTest extends InlineExpectationsTest {
+  EscapeTest() { this = "EscapeTest" }
+
+  override string getARelevantTag() { result = "escapedCharacter" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    location.getFile().getBaseName() = "escapedCharacterTest.py" and
+    exists(Regex re, int start, int end |
+      re.escapedCharacter(start, end) and
+      location = re.getLocation() and
+      element = re.getText().substring(start, end) and
+      value = start + ":" + end and
+      tag = "escapedCharacter"
+    )
+  }
+}
+
+class GroupTest extends InlineExpectationsTest {
+  GroupTest() { this = "GroupTest" }
+
+  override string getARelevantTag() { result = "group" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    location.getFile().getBaseName() = "groupTest.py" and
+    exists(Regex re, int start, int end |
+      re.group(start, end) and
+      location = re.getLocation() and
+      element = re.getText().substring(start, end) and
+      value = start + ":" + end and
+      tag = "group"
+    )
+  }
+}
--- a/python/ql/test/library-tests/regex/charRangeTest.py
+++ b/python/ql/test/library-tests/regex/charRangeTest.py
@@ -0,0 +1,45 @@
+import re
+
+
+re.compile(r'[A-Z]') #$ charRange=1:2-3:4
+
+try:
+    re.compile(r'[]-[]') #$ SPURIOUS: charRange=1:2-3:4
+    raise Exception("this should not be reached")
+except re.error:
+    pass
+
+re.compile(r'[---]') #$ charRange=1:2-3:4
+re.compile(r'[\---]') #$ charRange=1:3-4:5
+re.compile(r'[--\-]') #$ charRange=1:2-3:5
+re.compile(r'[\--\-]') #$ charRange=1:3-4:6
+re.compile(r'[0-9-A-Z]') #$ charRange=1:2-3:4 charRange=5:6-7:8
+re.compile(r'[0\-9-A-Z]') #$ charRange=4:5-6:7
+
+try:
+    re.compile(r'[0--9-A-Z]') #$ SPURIOUS: charRange=1:2-3:4 charRange=4:5-6:7
+    raise Exception("this should not be reached")
+except re.error:
+    pass
+
+re.compile(r'[^A-Z]') #$ charRange=2:3-4:5
+
+re.compile(r'[\0-\09]') #$ charRange=1:3-4:7
+
+re.compile(r'[\0123-5]') #$ charRange=5:6-7:8
+
+
+#Negative lookahead
+re.compile(r'(?!not-this)^[A-Z_]+$') #$ charRange=14:15-16:17
+#Negative lookbehind
+re.compile(r'^[A-Z_]+$(?<!not-this)') #$ charRange=2:3-4:5
+
+
+#OK -- ODASA-ODASA-3968
+re.compile('(?:[^%]|^)?%\((\w*)\)[a-z]') #$ charRange=22:23-24:25
+
+#ODASA-3985
+#Half Surrogate pairs
+re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') #$ charRange=1:2-3:4 charRange=6:7-8:9
+#Outside BMP
+re.compile(u'[\U00010000-\U0010ffff]') #$ charRange=1:2-3:4
--- a/python/ql/test/library-tests/regex/charSetTest.py
+++ b/python/ql/test/library-tests/regex/charSetTest.py
@@ -0,0 +1,39 @@
+import re
+re.compile(r'\A[+-]?\d+') #$ charSet=2:6
+re.compile(r'(?P<name>[\w]+)|') #$ charSet=9:13
+re.compile(r'\|\[\][123]|\{\}') #$ charSet=6:11
+re.compile(r'[^A-Z]') #$ charSet=0:6
+re.compile("[]]") #$ charSet=0:3
+re.compile("[][]") #$ charSet=0:4
+re.compile("[^][^]") #$ charSet=0:6
+re.compile("[.][.]") #$ charSet=0:3 charSet=3:6
+re.compile("[[]]") #$ charSet=0:3
+re.compile("[^]]") #$ charSet=0:4
+re.compile("[^-]") #$ charSet=0:4
+
+try:
+    re.compile("[]-[]") #$ SPURIOUS: charSet=0:5
+    raise Exception("this should not be reached")
+except re.error:
+    pass
+
+try:
+    re.compile("[^]-[]") #$ SPURIOUS: charSet=0:6
+    raise Exception("this should not be reached")
+except re.error:
+    pass
+
+re.compile("]]][[[[]") #$ charSet=3:8
+
+
+#ODASA-3985
+#Half Surrogate pairs
+re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') #$ charSet=0:5 charSet=5:10
+#Outside BMP
+re.compile(u'[\U00010000-\U0010ffff]') #$ charSet=0:5
+
+#Misparsed on LGTM
+re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)") #$ charSet=10:14 charSet=28:32
+
+ # parses wrongly, sees this   \|/ as a char set start
+re.compile(r'''(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+''') #$ charSet=3:25 charSet=30:35
--- a/python/ql/test/library-tests/regex/escapedCharacterTest.py
+++ b/python/ql/test/library-tests/regex/escapedCharacterTest.py
@@ -0,0 +1,23 @@
+import re
+
+re.compile(r'\b') #$ escapedCharacter=0:2
+re.compile(r'''\b''') #$ escapedCharacter=0:2
+re.compile(r"\b") #$ escapedCharacter=0:2
+re.compile(u"\b") # not escape
+re.compile("\b") # not escape
+re.compile(r'\\\b') #$ escapedCharacter=0:2 escapedCharacter=2:4
+re.compile(r'[\---]') #$ escapedCharacter=1:3
+re.compile(r'[--\-]') #$ escapedCharacter=3:5
+re.compile(r'[\--\-]') #$ escapedCharacter=1:3 escapedCharacter=4:6
+re.compile(r'[0\-9-A-Z]') #$ escapedCharacter=2:4
+re.compile(r'[\0-\09]') #$ escapedCharacter=1:3 escapedCharacter=4:7
+re.compile(r'[\0123-5]') #$ escapedCharacter=1:5
+
+#ODASA-3985
+#Half Surrogate pairs
+re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') # not escapes
+#Outside BMP
+re.compile(u'[\U00010000-\U0010ffff]') # not escapes
+
+#Misparsed on LGTM
+re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)") #$ escapedCharacter=0:2 escapedCharacter=16:18 escapedCharacter=18:20
--- a/python/ql/test/library-tests/regex/groupTest.py
+++ b/python/ql/test/library-tests/regex/groupTest.py
@@ -0,0 +1,4 @@
+import re
+
+re.compile(r'(?P<first>\w+) (?P<second>\w+)') #$ group=0:14 group=15:30
+re.compile(r'([)(])') #$ group=0:6
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.expected
@@ -0,0 +1,27 @@
+edges
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
+nodes
+| test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
+| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
+| test.py:37:11:37:24 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
+| test.py:39:22:39:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
+| test.py:40:22:40:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
+#select
+| test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:22:58:22:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:23:58:23:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:27:40:27:47 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:30:58:30:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:34:30:34:39 | ControlFlowNode for get_cert() | test.py:34:30:34:39 | ControlFlowNode for get_cert() | test.py:34:30:34:39 | ControlFlowNode for get_cert() | $@ is logged here. | test.py:34:30:34:39 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
+| test.py:37:11:37:24 | ControlFlowNode for get_password() | test.py:37:11:37:24 | ControlFlowNode for get_password() | test.py:37:11:37:24 | ControlFlowNode for get_password() | $@ is logged here. | test.py:37:11:37:24 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | $@ is logged here. | test.py:39:22:39:35 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | $@ is logged here. | test.py:40:22:40:35 | ControlFlowNode for get_password() | Sensitive data (password) |
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.qlref
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/CleartextLogging.qlref
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextLogging/test.py
@@ -0,0 +1,46 @@
+import logging
+import sys
+
+LOGGER = logging.getLogger("LOGGER")
+
+def get_logger():
+    return LOGGER
+
+
+def get_password():
+    return "<PASSWORD>"
+
+
+def get_cert():
+    return "<CERT>"
+
+
+def log_password():
+    password = get_password()
+    logging.info("logging.info Password '%s'", password) # NOT OK
+
+    LOGGER.log(logging.INFO, "LOGGER.log Password '%s'", password) # NOT OK
+    logging.root.info("logging.root.info Password '%s'", password) # NOT OK
+
+    # name of logger variable should not matter
+    foo = LOGGER
+    foo.info("foo.info Password '%s'", password) # NOT OK
+
+    # return value from function
+    get_logger().info("get_logger().info Password '%s'", password) # NOT OK
+
+
+def log_cert():
+    logging.debug("Cert=%s", get_cert()) # NOT OK
+
+def print_password():
+    print(get_password()) # NOT OK
+
+    sys.stdout.write(get_password()) # NOT OK
+    sys.stderr.write(get_password()) # NOT OK
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+    log_password()
+    log_cert()
+    print_password()
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.expected
@@ -0,0 +1,13 @@
+edges
+| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
+| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
+| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
+nodes
+| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
+| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
+| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
+#select
+| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
+| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
+| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.qlref
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/CleartextStorage.qlref
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/test.py
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage-py3/test.py
@@ -0,0 +1,15 @@
+import pathlib
+
+
+def get_cert():
+    return "<CERT>"
+
+
+def write_password(filename):
+    cert = get_cert()
+
+    path = pathlib.Path(filename)
+    path.write_text(cert) # NOT OK
+    path.write_bytes(cert.encode("utf-8")) # NOT OK
+
+    path.open("w").write(cert) # NOT OK
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/CleartextStorage.expected
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/CleartextStorage.expected
@@ -0,0 +1,20 @@
+edges
+| password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | password_in_cookie.py:9:33:9:40 | ControlFlowNode for password |
+| password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | password_in_cookie.py:16:33:16:40 | ControlFlowNode for password |
+| test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:8:20:8:23 | ControlFlowNode for cert |
+| test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:9:17:9:29 | ControlFlowNode for List |
+| test.py:9:17:9:29 | ControlFlowNode for List | test.py:10:25:10:29 | ControlFlowNode for lines |
+nodes
+| password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
+| test.py:6:12:6:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
+| test.py:8:20:8:23 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
+| test.py:9:17:9:29 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| test.py:10:25:10:29 | ControlFlowNode for lines | semmle.label | ControlFlowNode for lines |
+#select
+| password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | $@ is stored here. | password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | Sensitive data (password) |
+| password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | $@ is stored here. | password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | Sensitive data (password) |
+| test.py:8:20:8:23 | ControlFlowNode for cert | test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:8:20:8:23 | ControlFlowNode for cert | $@ is stored here. | test.py:6:12:6:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
+| test.py:10:25:10:29 | ControlFlowNode for lines | test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:10:25:10:29 | ControlFlowNode for lines | $@ is stored here. | test.py:6:12:6:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/CleartextStorage.qlref
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/CleartextStorage.qlref
@@ -0,0 +1 @@
+Security/CWE-312/CleartextStorage.ql
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/password_in_cookie.py
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/password_in_cookie.py
@@ -6,12 +6,12 @@ app = Flask("Leak password")
 def index():
    password = request.args.get("password")
    resp = make_response(render_template(...))
-    resp.set_cookie("password", password)
+    resp.set_cookie("password", password) # NOT OK
    return resp

@app.route('/')
 def index2():
    password = request.args.get("password")
    resp = Response(...)
-    resp.set_cookie("password", password)
+    resp.set_cookie("password", password) # NOT OK
    return resp
--- a/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/test.py
+++ b/python/ql/test/query-tests/Security/CWE-312-CleartextStorage/test.py
@@ -0,0 +1,10 @@
+def get_cert():
+    return "<CERT>"
+
+
+def write_cert(filename):
+    cert = get_cert()
+    with open(filename, "w") as file:
+        file.write(cert) # NOT OK
+        lines = [cert + "\n"]
+        file.writelines(lines) # NOT OK
--- a/python/ql/test/query-tests/Security/CWE-312/CleartextLogging.expected
+++ b/python/ql/test/query-tests/Security/CWE-312/CleartextLogging.expected
@@ -1,10 +0,0 @@
-edges
-| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
-| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
-| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
-| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
-| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
-#select
-| test.py:8:35:8:42 | password | test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password | Sensitive data returned by $@ is logged here. | test.py:7:16:7:29 | get_password() | a call returning a password |
-| test.py:14:30:14:39 | get_cert() | test.py:14:30:14:39 | a certificate or key | test.py:14:30:14:39 | a certificate or key | Sensitive data returned by $@ is logged here. | test.py:14:30:14:39 | get_cert() | a call returning a certificate or key |
-| test.py:17:11:17:24 | get_password() | test.py:17:11:17:24 | a password | test.py:17:11:17:24 | a password | Sensitive data returned by $@ is logged here. | test.py:17:11:17:24 | get_password() | a call returning a password |
--- a/python/ql/test/query-tests/Security/CWE-312/CleartextStorage.expected
+++ b/python/ql/test/query-tests/Security/CWE-312/CleartextStorage.expected
@@ -1,12 +0,0 @@
-edges
-| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
-| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
-| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
-| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
-| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
-| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
-| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
-#select
-| password_in_cookie.py:9:33:9:40 | password | password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password | Sensitive data from $@ is stored here. | password_in_cookie.py:7:16:7:43 | Attribute() | a request parameter containing a password |
-| password_in_cookie.py:16:33:16:40 | password | password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password | Sensitive data from $@ is stored here. | password_in_cookie.py:14:16:14:43 | Attribute() | a request parameter containing a password |
-| test.py:22:20:22:23 | cert | test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key | Sensitive data from $@ is stored here. | test.py:20:12:20:21 | get_cert() | a call returning a certificate or key |
--- a/python/ql/test/query-tests/Security/CWE-312/options
+++ b/python/ql/test/query-tests/Security/CWE-312/options
@@ -1 +0,0 @@
-semmle-extractor-options: -p ../lib/ --max-import-depth=3
--- a/python/ql/test/query-tests/Security/CWE-312/test.py
+++ b/python/ql/test/query-tests/Security/CWE-312/test.py
@@ -1,22 +0,0 @@
-#Don't import logging; it transitively imports a lot of stuff
-
-def get_password():
-    pass
-
-def log_password():
-    password = get_password()
-    logging.info("Password '%s'", password)
-
-def get_cert():
-    pass
-
-def log_cert():
-    logging.debug("Cert=%s", get_cert())
-
-def print_password():
-    print(get_password())
-
-def write_cert(filename):
-    cert = get_cert()
-    with open(filename, "w") as file:
-        file.write(cert)
--- a/python/ql/test/query-tests/Security/CWE-327-WeakSensitiveDataHashing/WeakSensitiveDataHashing.expected
+++ b/python/ql/test/query-tests/Security/CWE-327-WeakSensitiveDataHashing/WeakSensitiveDataHashing.expected
@@ -1,27 +1,71 @@
 edges
+| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome | test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate |
+| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password |
+| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password |
+| test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:2:23:2:34 | GSSA Variable get_password |
+| test_cryptodome.py:2:23:2:34 | GSSA Variable get_password | test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome |
+| test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate |
+| test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate | test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome |
+| test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous |
 | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous |
+| test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous |
 | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous |
+| test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous |
 | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous |
+| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography | test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate |
+| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password |
+| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password |
+| test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:3:23:3:34 | GSSA Variable get_password |
+| test_cryptography.py:3:23:3:34 | GSSA Variable get_password | test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography |
+| test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate |
+| test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate | test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography |
+| test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous |
 | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous |
+| test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous |
 | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous |
+| test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous |
 | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous |
 nodes
+| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome | semmle.label | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome |
+| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | semmle.label | ModuleVariableNode for Global Variable get_password in Module test_cryptodome |
+| test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| test_cryptodome.py:2:23:2:34 | GSSA Variable get_password | semmle.label | GSSA Variable get_password |
+| test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate | semmle.label | GSSA Variable get_certificate |
+| test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate | semmle.label | ControlFlowNode for get_certificate |
 | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
 | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
+| test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
 | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
 | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
+| test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
 | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
 | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
+| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography | semmle.label | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography |
+| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | semmle.label | ModuleVariableNode for Global Variable get_password in Module test_cryptography |
+| test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| test_cryptography.py:3:23:3:34 | GSSA Variable get_password | semmle.label | GSSA Variable get_password |
+| test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate | semmle.label | GSSA Variable get_certificate |
+| test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate | semmle.label | ControlFlowNode for get_certificate |
 | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
 | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
+| test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
 | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
 | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
+| test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
 | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
 | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
 #select
+| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | Sensitive data (certificate) |
 | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
+| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
 | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
 | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | Sensitive data (certificate) |
 | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
+| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
 | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | Sensitive data (password) |
+| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
 | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | Sensitive data (password) |
--- a/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/PolynomialReDoS.expected
+++ b/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/PolynomialReDoS.expected
@@ -0,0 +1,12 @@
+edges
+| test.py:7:12:7:18 | ControlFlowNode for request | test.py:7:12:7:23 | ControlFlowNode for Attribute |
+| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:8:30:8:33 | ControlFlowNode for text |
+| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:9:32:9:35 | ControlFlowNode for text |
+nodes
+| test.py:7:12:7:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| test.py:7:12:7:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:8:30:8:33 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
+| test.py:9:32:9:35 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
+#select
+| test.py:8:30:8:33 | ControlFlowNode for text | test.py:7:12:7:18 | ControlFlowNode for request | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:7:12:7:18 | ControlFlowNode for request | a user-provided value |
+| test.py:9:32:9:35 | ControlFlowNode for text | test.py:7:12:7:18 | ControlFlowNode for request | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on $@ may run slow on strings with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:7:12:7:18 | ControlFlowNode for request | a user-provided value |
--- a/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/PolynomialReDoS.qlref
+++ b/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/PolynomialReDoS.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-730/PolynomialReDoS.ql
--- a/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/test.py
+++ b/python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/test.py
@@ -0,0 +1,9 @@
+import re
+from flask import Flask, request
+app = Flask(__name__)
+
+@app.route("/poly-redos")
+def code_execution():
+    text = request.args.get("text")
+    re.sub(r"^\s+|\s+$", "", text) # NOT OK
+    re.match(r"^0\.\d+E?\d+$", text) # NOT OK
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/KnownCVEs.py
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/KnownCVEs.py
@@ -0,0 +1,94 @@
+import re
+
+# linear
+# https://github.com/github/codeql-python-CVE-coverage/issues/439
+rex_blame = re.compile(r'\s*(\d+)\s*(\S+) (.*)')
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/402
+whitespace = br"[\000\011\012\014\015\040]"
+whitespace_optional = whitespace + b"*"
+newline_only = br"[\r\n]+"
+newline = whitespace_optional + newline_only + whitespace_optional
+toFlag = re.compile(newline)
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/400
+re.compile(r'[+-]?(\d+)*\.\d+%?')
+re.compile(r'"""\s+(?:.|\n)*?\s+"""')
+re.compile(r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)')
+re.compile(r'".*``.*``.*"')
+re.compile(r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)')
+re.compile(r'(%config)(\s*\(\s*)(\w+)(\s*=\s*)(.*?)(\s*\)\s*)')
+re.compile(r'(%new)(\s*)(\()(\s*.*?\s*)(\))')
+re.compile(r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?')
+re.compile(r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)')
+
+# linear
+# https://github.com/github/codeql-python-CVE-coverage/issues/392
+simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/249
+rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
+                     'realm=(["\']?)([^"\']*)\\2', re.I)
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/248
+gauntlet = re.compile(
+            r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
+            flags=re.U
+        )
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/227
+# from .compat import tobytes
+
+WS = "[ \t]"
+OWS = WS + "{0,}?"
+
+# RFC 7230 Section 3.2.6 "Field Value Components":
+# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+#                / DIGIT / ALPHA
+# obs-text      = %x80-FF
+TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]"
+OBS_TEXT = r"\x80-\xff"
+TOKEN = TCHAR + "{1,}"
+# RFC 5234 Appendix B.1 "Core Rules":
+# VCHAR         =  %x21-7E
+#                  ; visible (printing) characters
+VCHAR = r"\x21-\x7e"
+# header-field   = field-name ":" OWS field-value OWS
+# field-name     = token
+# field-value    = *( field-content / obs-fold )
+# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar    = VCHAR / obs-text
+# Errata from: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+# changes field-content to:
+#
+# field-content  = field-vchar [ 1*( SP / HTAB / field-vchar )
+#                  field-vchar ]
+
+FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
+FIELD_CONTENT = FIELD_VCHAR + "([ \t" + VCHAR + OBS_TEXT + "]+" + FIELD_VCHAR + "){,1}"
+FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
+
+HEADER_FIELD = re.compile(
+    #  tobytes(
+         "^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
+    #  )
+ )
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/224
+pattern = re.compile(
+    r'^(:?(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|'  # domain pt.1
+    r'([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|'  # domain pt.2
+    r'([a-zA-Z0-9][-_a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.)+'  # domain pt.3
+    r'([a-zA-Z]{2,13}|(xn--[a-zA-Z0-9]{2,30}))$'  # TLD
+)
+
+# https://github.com/github/codeql-python-CVE-coverage/issues/189
+URL_REGEX = (
+     r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|'
+     r'[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|'
+     r'(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|'
+     r'[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))'  # "emacs!
+)
+
+url = re.compile(URL_REGEX)
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected
@@ -0,0 +1,97 @@
+| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
+| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
+| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
+| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
+| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
+| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
+| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
+| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
+| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
+| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
+| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
+| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
+| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
+| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
+| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
+| redos.py:60:25:60:30 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:61:25:61:30 | [a-z]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:63:26:63:33 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
+| redos.py:68:26:68:41 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
+| redos.py:68:48:68:50 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '[' and containing many repetitions of ']['. |
+| redos.py:73:29:73:36 | (\\\\?.)*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '\\\\a'. |
+| redos.py:76:24:76:31 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
+| redos.py:79:24:79:31 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:91:24:91:31 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:97:25:97:38 | ([\\s\\S]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
+| redos.py:103:25:103:33 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
+| redos.py:109:25:109:33 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
+| redos.py:112:25:112:33 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
+| redos.py:115:25:115:37 | ([0-9]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:127:25:127:38 | ([a-z]\|[d-h])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
+| redos.py:130:25:130:40 | ([^a-z]\|[^0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
+| redos.py:133:25:133:35 | (\\d\|[0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
+| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
+| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
+| redos.py:160:25:160:32 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
+| redos.py:163:25:163:32 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:166:25:166:34 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:169:25:169:37 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
+| redos.py:172:25:172:33 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:175:26:175:30 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
+| redos.py:187:26:187:31 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
+| redos.py:190:27:190:29 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '\\n' and containing many repetitions of '\\n'. |
+| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
+| redos.py:196:78:196:89 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A:' and containing many repetitions of '  A:'. |
+| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
+| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:199:28:199:29 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
+| redos.py:202:26:202:32 | (a+a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:202:27:202:28 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:205:25:205:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:211:25:211:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:217:25:217:27 | \\n+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
+| redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
+| redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
+| redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
+| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
+| redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
+| redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
+| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
+| redos.py:256:49:256:51 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
+| redos.py:256:61:256:63 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
+| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
+| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
+| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
+| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
+| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"" a='. |
+| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
+| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
+| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
+| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
+| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
+| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:319:28:319:33 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
+| redos.py:322:25:322:29 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |
+| redos.py:325:24:325:30 | (a?a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:334:24:334:32 | (?:a\|a?)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:340:27:340:55 | (([a-c]\|[c-d])T(e?e?e?e?\|X))+ | This part of the regular expression may cause exponential backtracking on strings starting with 'PRE' and containing many repetitions of 'cTX'. |
+| redos.py:343:26:343:29 | (a)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
+| redos.py:346:26:346:27 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bb'. |
+| redos.py:352:25:352:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:353:25:353:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:354:25:354:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:355:25:355:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| redos.py:362:25:362:40 | ((?:a{\|-)\|\\w\\{)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{'. |
+| redos.py:363:25:363:43 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
+| redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
+| redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
+| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
+| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
+| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.qlref
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-730/ReDoS.ql
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/redos.py
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/redos.py
@@ -0,0 +1,374 @@
+import re
+
+# NOT GOOD; attack: "_" + "__".repeat(100)
+# Adapted from marked (https://github.com/markedjs/marked), which is licensed
+# under the MIT license; see file marked-LICENSE.
+bad1 = re.compile(r'''^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)''')
+
+# GOOD
+# Adapted from marked (https://github.com/markedjs/marked), which is licensed
+# under the MIT license; see file marked-LICENSE.
+good1 = re.compile(r'^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)')
+
+# GOOD - there is no witness in the end that could cause the regexp to not match
+# Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
+# which is licensed under the MIT license; see file brace-expansion-LICENSE.
+good2 = re.compile(r'(.*,)+.+')
+
+# NOT GOOD; attack: " '" + "\\\\".repeat(100)
+# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
+# which is licensed under the MIT license; see file CodeMirror-LICENSE.
+bad2 = re.compile(r'''^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?''')
+
+# GOOD
+# Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
+good2 = re.compile(r'''\(\*(?:[\s\S]*?\(\*[\s\S]*?\*\))*[\s\S]*?\*\)''')
+
+# GOOD
+# Adapted from jest (https://github.com/facebook/jest), which is licensed
+# under the MIT license; see file jest-LICENSE.
+good3 = re.compile(r'''^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*''')
+
+# NOT GOOD, variant of good3; attack: "a|\n:|\n" + "||\n".repeat(100)
+bad4 = re.compile(r'''^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)a''')
+
+# NOT GOOD; attack: "/" + "\\/a".repeat(100)
+# Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
+# which is licensed under the Apache License 2.0; see file ANodeBlog-LICENSE.
+bad5 = re.compile(r'''\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)''')
+
+# NOT GOOD; attack: "##".repeat(100) + "\na"
+# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
+# which is licensed under the MIT license; see file CodeMirror-LICENSE.
+bad6 = re.compile(r'''^([\s\[\{\(]|#.*)*$''')
+
+# GOOD
+good4 = re.compile(r'''(\r\n|\r|\n)+''')
+
+# BAD - PoC: `node -e "/((?:[^\"\']|\".*?\"|\'.*?\')*?)([(,)]|$)/.test(\"'''''''''''''''''''''''''''''''''''''''''''''\\\"\");"`. It's complicated though, because the regexp still matches something, it just matches the empty-string after the attack string.
+actuallyBad = re.compile(r'''((?:[^"']|".*?"|'.*?')*?)([(,)]|$)''')
+
+# NOT GOOD; attack: "a" + "[]".repeat(100) + ".b\n"
+# Adapted from Knockout (https://github.com/knockout/knockout), which is
+# licensed under the MIT license; see file knockout-LICENSE
+bad6 = re.compile(r'''^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$''')
+
+# GOOD
+good6 = re.compile(r'''(a|.)*''')
+
+# Testing the NFA - only some of the below are detected.
+bad7 = re.compile(r'''^([a-z]+)+$''')
+bad8 = re.compile(r'''^([a-z]*)*$''')
+bad9 = re.compile(r'''^([a-zA-Z0-9])(([\\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$''')
+bad10 = re.compile(r'''^(([a-z])+.)+[A-Z]([a-z])+$''')
+
+# NOT GOOD; attack: "[" + "][".repeat(100) + "]!"
+# Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
+# is licensed under the MIT license; see file Prototype.js-LICENSE.
+bad11 = re.compile(r'''(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)''')
+
+# NOT GOOD; attack: "'" + "\\a".repeat(100) + '"'
+# Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
+# under the MIT license; see file Prism-LICENSE.
+bad12 = re.compile(r'''("|')(\\?.)*?\1''')
+
+# NOT GOOD
+bad13 = re.compile(r'''(b|a?b)*c''')
+
+# NOT GOOD
+bad15 = re.compile(r'''(a|aa?)*b''')
+
+# GOOD
+good7 = re.compile(r'''(.|\n)*!''')
+
+# NOT GOOD; attack: "\n".repeat(100) + "."
+bad16 = re.compile(r'''(.|\n)*!''')
+
+# GOOD
+good8 = re.compile(r'''([\w.]+)*''')
+
+# NOT GOOD
+bad17 = re.compile(r'''(a|aa?)*b''')
+
+# GOOD - not used as regexp
+good9 = '(a|aa?)*b'
+
+# NOT GOOD
+bad18 = re.compile(r'''(([\s\S]|[^a])*)"''')
+
+# GOOD - there is no witness in the end that could cause the regexp to not match
+good10 = re.compile(r'''([^"']+)*''')
+
+# NOT GOOD
+bad20 = re.compile(r'''((.|[^a])*)"''')
+
+# GOOD
+good10 = re.compile(r'''((a|[^a])*)"''')
+
+# NOT GOOD
+bad21 = re.compile(r'''((b|[^a])*)"''')
+
+# NOT GOOD
+bad22 = re.compile(r'''((G|[^a])*)"''')
+
+# NOT GOOD
+bad23 = re.compile(r'''(([0-9]|[^a])*)"''')
+
+# NOT GOOD
+bad24 = re.compile(r'''(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?''')
+
+# NOT GOOD
+bad25 = re.compile(r'''"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"''')
+
+# GOOD
+bad26 = re.compile(r'''"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"''')
+
+# NOT GOOD
+bad27 = re.compile(r'''(([a-z]|[d-h])*)"''')
+
+# NOT GOOD
+bad27 = re.compile(r'''(([^a-z]|[^0-9])*)"''')
+
+# NOT GOOD
+bad28 = re.compile(r'''((\d|[0-9])*)"''')
+
+# NOT GOOD
+bad29 = re.compile(r'''((\s|\s)*)"''')
+
+# NOT GOOD
+bad30 = re.compile(r'''((\w|G)*)"''')
+
+# GOOD
+good11 = re.compile(r'''((\s|\d)*)"''')
+
+# NOT GOOD
+bad31 = re.compile(r'''((\d|\w)*)"''')
+
+# NOT GOOD
+bad32 = re.compile(r'''((\d|5)*)"''')
+
+# NOT GOOD
+bad33 = re.compile(r'''((\s|[\f])*)"''')
+
+# NOT GOOD
+bad34 = re.compile(r'''((\s|[\v]|\\v)*)"''')
+
+# NOT GOOD
+bad35 = re.compile(r'''((\f|[\f])*)"''')
+
+# NOT GOOD
+bad36 = re.compile(r'''((\W|\D)*)"''')
+
+# NOT GOOD
+bad37 = re.compile(r'''((\S|\w)*)"''')
+
+# NOT GOOD
+bad38 = re.compile(r'''((\S|[\w])*)"''')
+
+# NOT GOOD
+bad39 = re.compile(r'''((1s|[\da-z])*)"''')
+
+# NOT GOOD
+bad40 = re.compile(r'''((0|[\d])*)"''')
+
+# NOT GOOD
+bad41 = re.compile(r'''(([\d]+)*)"''')
+
+# GOOD - there is no witness in the end that could cause the regexp to not match
+good12 = re.compile(r'''(\d+(X\d+)?)+''')
+
+# GOOD - there is no witness in the end that could cause the regexp to not match
+good13 = re.compile(r'''([0-9]+(X[0-9]*)?)*''')
+
+# GOOD
+good15 = re.compile(r'''^([^>]+)*(>|$)''')
+
+# NOT GOOD
+bad43 = re.compile(r'''^([^>a]+)*(>|$)''')
+
+# NOT GOOD
+bad44 = re.compile(r'''(\n\s*)+$''')
+
+# NOT GOOD
+bad45 = re.compile(r'''^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})''')
+
+# NOT GOOD
+bad46 = re.compile(r'''\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}''')
+
+# NOT GOOD
+bad47 = re.compile(r'''(a+|b+|c+)*c''')
+
+# NOT GOOD
+bad48 = re.compile(r'''(((a+a?)*)+b+)''')
+
+# NOT GOOD
+bad49 = re.compile(r'''(a+)+bbbb''')
+
+# GOOD
+good16 = re.compile(r'''(a+)+aaaaa*a+''')
+
+# NOT GOOD
+bad50 = re.compile(r'''(a+)+aaaaa$''')
+
+# GOOD
+good17 = re.compile(r'''(\n+)+\n\n''')
+
+# NOT GOOD
+bad51 = re.compile(r'''(\n+)+\n\n$''')
+
+# NOT GOOD
+bad52 = re.compile(r'''([^X]+)*$''')
+
+# NOT GOOD
+bad53 = re.compile(r'''(([^X]b)+)*$''')
+
+# GOOD
+good18 = re.compile(r'''(([^X]b)+)*($|[^X]b)''')
+
+# NOT GOOD
+bad54 = re.compile(r'''(([^X]b)+)*($|[^X]c)''')
+
+# GOOD
+good20 = re.compile(r'''((ab)+)*ababab''')
+
+# GOOD
+good21 = re.compile(r'''((ab)+)*abab(ab)*(ab)+''')
+
+# GOOD
+good22 = re.compile(r'''((ab)+)*''')
+
+# NOT GOOD
+bad55 = re.compile(r'''((ab)+)*$''')
+
+# GOOD
+good23 = re.compile(r'''((ab)+)*[a1][b1][a2][b2][a3][b3]''')
+
+# NOT GOOD
+bad56 = re.compile(r'''([\n\s]+)*(.)''')
+
+# GOOD - any witness passes through the accept state.
+good24 = re.compile(r'''(A*A*X)*''')
+
+# GOOD
+good26 = re.compile(r'''([^\\\]]+)*''')
+
+# NOT GOOD
+bad59 = re.compile(r'''(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-''')
+
+# NOT GOOD
+bad60 = re.compile(r'''(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamnlongstringforstresstestingthequery)*-''')
+
+# NOT GOOD
+bad61 = re.compile(r'''(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-''')
+
+# GOOD
+good27 = re.compile(r'''(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-''')
+
+# GOOD
+good28 = re.compile(r'''foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo''')
+
+# GOOD
+good29 = re.compile(r'''foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo''')
+
+# NOT GOOD (but cannot currently construct a prefix)
+bad62 = re.compile(r'''a{2,3}(b+)+X''')
+
+# NOT GOOD (and a good prefix test)
+bad63 = re.compile(r'''^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>''')
+
+# GOOD
+good30 = re.compile(r'''(a+)*[\s\S][\s\S][\s\S]?''')
+
+# GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
+good31 = re.compile(r'''(a+)*[\s\S]{2,3}''')
+
+# GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
+good32 = re.compile(r'''(a+)*([\s\S]{2,}|X)$''')
+
+# GOOD
+good33 = re.compile(r'''(a+)*([\s\S]*|X)$''')
+
+# NOT GOOD
+bad64 = re.compile(r'''((a+)*$|[\s\S]+)''')
+
+# GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
+good34 = re.compile(r'''([\s\S]+|(a+)*$)''')
+
+# GOOD
+good35 = re.compile(r'''((;|^)a+)+$''')
+
+# NOT GOOD (a good prefix test)
+bad65 = re.compile(r'''(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f''')
+
+# NOT GOOD
+bad66 = re.compile(r'''^ab(c+)+$''')
+
+# NOT GOOD
+bad67 = re.compile(r'''(\d(\s+)*){20}''')
+
+# GOOD - but we spuriously conclude that a rejecting suffix exists.
+good36 = re.compile(r'''(([^/]|X)+)(\/[\s\S]*)*$''')
+
+# GOOD - but we spuriously conclude that a rejecting suffix exists.
+good37 = re.compile(r'''^((x([^Y]+)?)*(Y|$))''')
+
+# NOT GOOD
+bad68 = re.compile(r'''(a*)+b''')
+
+# NOT GOOD
+bad69 = re.compile(r'''foo([\w-]*)+bar''')
+
+# NOT GOOD
+bad70 = re.compile(r'''((ab)*)+c''')
+
+# NOT GOOD
+bad71 = re.compile(r'''(a?a?)*b''')
+
+# GOOD
+good38 = re.compile(r'''(a?)*b''')
+
+# NOT GOOD - but not detected
+bad72 = re.compile(r'''(c?a?)*b''')
+
+# NOT GOOD
+bad73 = re.compile(r'''(?:a|a?)+b''')
+
+# NOT GOOD - but not detected.
+bad74 = re.compile(r'''(a?b?)*$''')
+
+# NOT GOOD
+bad76 = re.compile(r'''PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)''')
+
+# NOT GOOD - but not detected
+bad77 = re.compile(r'''^((a)+\w)+$''')
+
+# NOT GOOD
+bad78 = re.compile(r'''^(b+.)+$''')
+
+# GOOD
+good39 = re.compile(r'''a*b''')
+
+# All 4 bad combinations of nested * and +
+bad79 = re.compile(r'''(a*)*b''')
+bad80 = re.compile(r'''(a+)*b''')
+bad81 = re.compile(r'''(a*)+b''')
+bad82 = re.compile(r'''(a+)+b''')
+
+# GOOD
+good40 = re.compile(r'''(a|b)+''')
+good41 = re.compile(r'''(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+''') # parses wrongly, sees column 42 as a char set start
+
+# NOT GOOD
+bad83 = re.compile(r'''^((?:a{|-)|\w\{)+X$''')
+bad84 = re.compile(r'''^((?:a{0|-)|\w\{\d)+X$''')
+bad85 = re.compile(r'''^((?:a{0,|-)|\w\{\d,)+X$''')
+bad86 = re.compile(r'''^((?:a{0,2|-)|\w\{\d,\d)+X$''')
+
+# GOOD:
+good42 = re.compile(r'''^((?:a{0,2}|-)|\w\{\d,\d\})+X$''')
+
+# NOT GOOD
+bad87 = re.compile(r'X(\u0061|a)*Y')
+
+# GOOD
+good43 = re.compile(r'X(\u0061|b)+Y')
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py
@@ -0,0 +1,9 @@
+import re
+
+# Treatment of escapes
+re.compile(r"X([^\.]|\.)*$") # No ReDoS.
+re.compile(r"X(Æ|\Æ)+$") # Has ReDoS.
+
+# Treatment of line breaks
+re.compile(r'(?:.|\n)*b') # No ReDoS.
+re.compile(r'(?:.|\n)*b', re.DOTALL) # Has ReDoS.
				`@@ -0,0 +1 @@`
				`experimental/Security/CWE-287/ImproperLdapAuth.ql`
				`@@ -0,0 +1 @@`
				`semmle-extractor-options: --max-import-depth=1 --lang=3`
				`@@ -1 +0,0 @@`
				`semmle-extractor-options: -p ../lib/ --max-import-depth=3`
				`@@ -0,0 +1 @@`
				`experimental/Security/CWE-730/PolynomialReDoS.ql`