Merge remote-tracking branch 'upstream/main' into post-release-prep/codeql-cli-2.13.3

2025-12-22 03:36:30 +01:00 · 2023-05-30 21:27:53 +02:00
parent d2e192020b 60a5ef744f
commit 490d22d123
360 changed files with 40677 additions and 18302 deletions
--- a/python/ql/lib/semmlecode.python.dbscheme
+++ b/python/ql/lib/semmlecode.python.dbscheme
@@ -16,9 +16,7 @@
 * mechanism not work properly.
 */

- /*
- * External artifacts
- */
+/*- DEPRECATED: External defects and metrics -*/

 externalDefects(
  unique int id : @externalDefect,
@@ -35,31 +33,44 @@ externalMetrics(
  float value : float ref
 );

+/*- External data -*/
+
+/**
+ * External data, loaded from CSV files during snapshot creation. See
+ * [Tutorial: Incorporating external data](https://help.semmle.com/wiki/display/SD/Tutorial%3A+Incorporating+external+data)
+ * for more information.
+ */
 externalData(
  int id : @externalDataElement,
-  varchar(900) queryPath : string ref,
+  string path : string ref,
  int column: int ref,
-  varchar(900) data : string ref
+  string value : string ref
 );

+/*- DEPRECATED: Snapshot date -*/
+
 snapshotDate(unique date snapshotDate : date ref);

-sourceLocationPrefix(varchar(900) prefix : string ref);
+/*- Source location prefix -*/

-
-/*
- * Duplicate code
+/**
+ * The source location of the snapshot.
 */
+sourceLocationPrefix(string prefix : string ref);
+
+/*- DEPRECATED: Duplicate code -*/

 duplicateCode(
  unique int id : @duplication,
-  varchar(900) relativePath : string ref,
-  int equivClass : int ref);
+  string relativePath : string ref,
+  int equivClass : int ref
+);

 similarCode(
  unique int id : @similarity,
-  varchar(900) relativePath : string ref,
-  int equivClass : int ref);
+  string relativePath : string ref,
+  int equivClass : int ref
+);

@duplication_or_similarity = @duplication | @similarity

@@ -69,7 +80,192 @@ tokens(
  int beginLine : int ref,
  int beginColumn : int ref,
  int endLine : int ref,
-  int endColumn : int ref);
+  int endColumn : int ref
+);
+
+/*- DEPRECATED: Version control data -*/
+
+svnentries(
+  unique int id : @svnentry,
+  string revision : string ref,
+  string author : string ref,
+  date revisionDate : date ref,
+  int changeSize : int ref
+)
+
+svnaffectedfiles(
+  int id : @svnentry ref,
+  int file : @file ref,
+  string action : string ref
+)
+
+svnentrymsg(
+  unique int id : @svnentry ref,
+  string message : string ref
+)
+
+svnchurn(
+  int commit : @svnentry ref,
+  int file : @file ref,
+  int addedLines : int ref,
+  int deletedLines : int ref
+)
+
+/*- Lines of code -*/
+
+numlines(
+  int element_id: @sourceline ref,
+  int num_lines: int ref,
+  int num_code: int ref,
+  int num_comment: int ref
+);
+
+/*- Files and folders -*/
+
+/**
+ * The location of an element.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `file`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+locations_default(
+  unique int id: @location_default,
+  int file: @file ref,
+  int beginLine: int ref,
+  int beginColumn: int ref,
+  int endLine: int ref,
+  int endColumn: int ref
+);
+
+files(
+  unique int id: @file,
+  string name: string ref
+);
+
+folders(
+  unique int id: @folder,
+  string name: string ref
+);
+
+@container = @file | @folder
+
+containerparent(
+  int parent: @container ref,
+  unique int child: @container ref
+);
+
+/*- XML Files -*/
+
+xmlEncoding(
+  unique int id: @file ref,
+  string encoding: string ref
+);
+
+xmlDTDs(
+  unique int id: @xmldtd,
+  string root: string ref,
+  string publicId: string ref,
+  string systemId: string ref,
+  int fileid: @file ref
+);
+
+xmlElements(
+  unique int id: @xmlelement,
+  string name: string ref,
+  int parentid: @xmlparent ref,
+  int idx: int ref,
+  int fileid: @file ref
+);
+
+xmlAttrs(
+  unique int id: @xmlattribute,
+  int elementid: @xmlelement ref,
+  string name: string ref,
+  string value: string ref,
+  int idx: int ref,
+  int fileid: @file ref
+);
+
+xmlNs(
+  int id: @xmlnamespace,
+  string prefixName: string ref,
+  string URI: string ref,
+  int fileid: @file ref
+);
+
+xmlHasNs(
+  int elementId: @xmlnamespaceable ref,
+  int nsId: @xmlnamespace ref,
+  int fileid: @file ref
+);
+
+xmlComments(
+  unique int id: @xmlcomment,
+  string text: string ref,
+  int parentid: @xmlparent ref,
+  int fileid: @file ref
+);
+
+xmlChars(
+  unique int id: @xmlcharacters,
+  string text: string ref,
+  int parentid: @xmlparent ref,
+  int idx: int ref,
+  int isCDATA: int ref,
+  int fileid: @file ref
+);
+
+@xmlparent = @file | @xmlelement;
+@xmlnamespaceable = @xmlelement | @xmlattribute;
+
+xmllocations(
+  int xmlElement: @xmllocatable ref,
+  int location: @location_default ref
+);
+
+@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;
+
+/*- YAML -*/
+
+#keyset[parent, idx]
+yaml (unique int id: @yaml_node,
+      int kind: int ref,
+      int parent: @yaml_node_parent ref,
+      int idx: int ref,
+      string tag: string ref,
+      string tostring: string ref);
+
+case @yaml_node.kind of
+  0 = @yaml_scalar_node
+| 1 = @yaml_mapping_node
+| 2 = @yaml_sequence_node
+| 3 = @yaml_alias_node
+;
+
+@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node;
+
+@yaml_node_parent = @yaml_collection_node | @file;
+
+yaml_anchors (unique int node: @yaml_node ref,
+              string anchor: string ref);
+
+yaml_aliases (unique int alias: @yaml_alias_node ref,
+              string target: string ref);
+
+yaml_scalars (unique int scalar: @yaml_scalar_node ref,
+              int style: int ref,
+              string value: string ref);
+
+yaml_errors (unique int id: @yaml_error,
+             string message: string ref);
+
+yaml_locations(unique int locatable: @yaml_locatable ref,
+             int location: @location_default ref);
+
+@yaml_locatable = @yaml_node | @yaml_error;
+
+/*- Python dbscheme -*/

 /*
 * Line metrics
@@ -86,68 +282,14 @@ py_docstringlines(int id : @py_scope ref,
 py_alllines(int id : @py_scope ref,
                int count : int ref);

-/*
- * Version history
- */
-
-svnentries(
-  int id : @svnentry,
-  varchar(500) revision : string ref,
-  varchar(500) author : string ref,
-  date revisionDate : date ref,
-  int changeSize : int ref
-)
-
-svnaffectedfiles(
-  int id : @svnentry ref,
-  int file : @file ref,
-  varchar(500) action : string ref
-)
-
-svnentrymsg(
-  int id : @svnentry ref,
-  varchar(500) message : string ref
-)
-
-svnchurn(
-  int commit : @svnentry ref,
-  int file : @file ref,
-  int addedLines : int ref,
-  int deletedLines : int ref
-)
-
 /****************************
        Python dbscheme
 ****************************/

-files(unique int id: @file,
-      varchar(900) name: string ref);
-
-folders(unique int id: @folder,
-        varchar(900) name: string ref);
-
-@container = @folder | @file;
-
-containerparent(int parent: @container ref,
-                unique int child: @container ref);
-
@sourceline = @file | @py_Module | @xmllocatable;

-numlines(int element_id: @sourceline ref,
-         int num_lines: int ref,
-         int num_code: int ref,
-         int num_comment: int ref
-        );
-
@location = @location_ast | @location_default ;

-locations_default(unique int id: @location_default,
-          int file: @file ref,
-          int beginLine: int ref,
-          int beginColumn: int ref,
-          int endLine: int ref,
-          int endColumn: int ref);
-
 locations_ast(unique int id: @location_ast,
          int module: @py_Module ref,
          int beginLine: int ref,
@@ -1052,96 +1194,3 @@ py_decorated_object(int object : @py_object ref,
@py_object = @py_cobject | @py_flow_node;

@py_source_element = @py_ast_node | @container;
-
-/* XML Files */
-
-xmlEncoding  (unique int id: @file ref, varchar(900) encoding: string ref);
-
-xmlDTDs       (unique int id: @xmldtd,
-              varchar(900) root: string ref,
-              varchar(900) publicId: string ref,
-              varchar(900) systemId: string ref,
-              int fileid: @file ref);
-
-xmlElements   (unique int id: @xmlelement,
-              varchar(900) name: string ref,
-              int parentid: @xmlparent ref,
-              int idx: int ref,
-              int fileid: @file ref);
-
-xmlAttrs      (unique int id: @xmlattribute,
-              int elementid: @xmlelement ref,
-              varchar(900) name: string ref,
-              varchar(3600) value: string ref,
-              int idx: int ref,
-              int fileid: @file ref);
-
-xmlNs         (int id: @xmlnamespace,
-              varchar(900) prefixName: string ref,
-              varchar(900) URI: string ref,
-              int fileid: @file ref);
-
-xmlHasNs      (int elementId: @xmlnamespaceable ref,
-              int nsId: @xmlnamespace ref,
-              int fileid: @file ref);
-
-xmlComments   (unique int id: @xmlcomment,
-              varchar(3600) text: string ref,
-              int parentid: @xmlparent ref,
-              int fileid: @file ref);
-
-xmlChars      (unique int id: @xmlcharacters,
-              varchar(3600) text: string ref,
-              int parentid: @xmlparent ref,
-              int idx: int ref,
-              int isCDATA: int ref,
-              int fileid: @file ref);
-
-@xmlparent = @file | @xmlelement;
-@xmlnamespaceable = @xmlelement | @xmlattribute;
-
-xmllocations(int xmlElement: @xmllocatable ref,
-             int location: @location_default ref);
-
-@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;
-
-/**
- *  YAML
- */
-
-#keyset[parent, idx]
-yaml (unique int id: @yaml_node,
-      int kind: int ref,
-      int parent: @yaml_node_parent ref,
-      int idx: int ref,
-      varchar(900) tag: string ref,
-      varchar(900) tostring: string ref);
-
-case @yaml_node.kind of
-  0 = @yaml_scalar_node
-| 1 = @yaml_mapping_node
-| 2 = @yaml_sequence_node
-| 3 = @yaml_alias_node
-;
-
-@yaml_collection_node = @yaml_mapping_node | @yaml_sequence_node;
-
-@yaml_node_parent = @yaml_collection_node | @file;
-
-yaml_anchors (unique int node: @yaml_node ref,
-              varchar(900) anchor: string ref);
-
-yaml_aliases (unique int alias: @yaml_alias_node ref,
-              varchar(900) target: string ref);
-
-yaml_scalars (unique int scalar: @yaml_scalar_node ref,
-              int style: int ref,
-              varchar(900) value: string ref);
-
-yaml_errors (unique int id: @yaml_error,
-             varchar(900) message: string ref);
-
-yaml_locations(unique int locatable: @yaml_locatable ref,
-             int location: @location_default ref);
-
-@yaml_locatable = @yaml_node | @yaml_error;
--- a/python/ql/lib/semmlecode.python.dbscheme.stats
+++ b/python/ql/lib/semmlecode.python.dbscheme.stats
@@ -1056,7 +1056,7 @@
 <v>20</v>
 </e>
 <e>
-<k>queryPath</k>
+<k>path</k>
 <v>2</v>
 </e>
 <e>
@@ -1064,14 +1064,14 @@
 <v>5</v>
 </e>
 <e>
-<k>data</k>
+<k>value</k>
 <v>41</v>
 </e>
 </columnsizes>
 <dependencies>
 <dep>
 <src>id</src>
-<trg>queryPath</trg>
+<trg>path</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1103,7 +1103,7 @@
 </dep>
 <dep>
 <src>id</src>
-<trg>data</trg>
+<trg>value</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1118,7 +1118,7 @@
 </val>
 </dep>
 <dep>
-<src>queryPath</src>
+<src>path</src>
 <trg>id</trg>
 <val>
 <hist>
@@ -1134,7 +1134,7 @@
 </val>
 </dep>
 <dep>
-<src>queryPath</src>
+<src>path</src>
 <trg>column</trg>
 <val>
 <hist>
@@ -1150,8 +1150,8 @@
 </val>
 </dep>
 <dep>
-<src>queryPath</src>
-<trg>data</trg>
+<src>path</src>
+<trg>value</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1183,7 +1183,7 @@
 </dep>
 <dep>
 <src>column</src>
-<trg>queryPath</trg>
+<trg>path</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1199,7 +1199,7 @@
 </dep>
 <dep>
 <src>column</src>
-<trg>data</trg>
+<trg>value</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1214,7 +1214,7 @@
 </val>
 </dep>
 <dep>
-<src>data</src>
+<src>value</src>
 <trg>id</trg>
 <val>
 <hist>
@@ -1230,8 +1230,8 @@
 </val>
 </dep>
 <dep>
-<src>data</src>
-<trg>queryPath</trg>
+<src>value</src>
+<trg>path</trg>
 <val>
 <hist>
 <budget>12</budget>
@@ -1246,7 +1246,7 @@
 </val>
 </dep>
 <dep>
-<src>data</src>
+<src>value</src>
 <trg>column</trg>
 <val>
 <hist>
--- a/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/old.dbscheme
+++ b/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/old.dbscheme
--- a/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/semmlecode.python.dbscheme
+++ b/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/semmlecode.python.dbscheme
--- a/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/upgrade.properties
+++ b/python/ql/lib/upgrades/0355ecf0ac589e66467a378e0e9d60f41ee4a757/upgrade.properties
@@ -0,0 +1,2 @@
+description: Sync dbscheme fragments
+compatibility: full
--- a/python/ql/src/Security/CWE-730/PolynomialReDoS.qhelp
+++ b/python/ql/src/Security/CWE-730/PolynomialReDoS.qhelp
@@ -15,8 +15,7 @@
 		</p>

 		<sample language="python">
-			re.sub(r"^\s+|\s+$", "", text) # BAD
-		</sample>
+re.sub(r"^\s+|\s+$", "", text) # BAD</sample>

 		<p>

@@ -71,8 +70,7 @@
 		</p>

 		<sample language="python">
-			^0\.\d+E?\d+$ # BAD
-		</sample>
+^0\.\d+E?\d+$ # BAD</sample>

 		<p>

@@ -103,6 +101,32 @@

 	</example>

+    <example>
+        <p>
+            Sometimes it is unclear how a regular expression can be rewritten to
+            avoid the problem. In such cases, it often suffices to limit the
+            length of the input string. For instance, the following 
+            regular expression is used to match numbers, and on some non-number 
+            inputs it can have quadratic time complexity:
+        </p>
+
+        <sample language="python">
+match = re.search(r'^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$', str) </sample>
+
+        <p>
+            It is not immediately obvious how to rewrite this regular expression 
+            to avoid the problem. However, you can mitigate performance issues by limiting the length
+            to 1000 characters, which will always finish in a reasonable amount
+            of time.
+        </p>
+
+        <sample language="python">
+if len(str) &gt; 1000:
+    raise ValueError("Input too long")
+
+match = re.search(r'^(\+|-)?(\d+|(\d*\.\d*))?(E|e)?([-+])?(\d+)?$', str) </sample>
+    </example>
+
 	<include src="ReDoSReferences.inc.qhelp"/>

 </qhelp>
--- a/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.qhelp
+++ b/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.qhelp
@@ -0,0 +1,32 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+    <overview>
+        <p>Security checks bypass due to a Unicode transformation</p>
+        <p> If security checks or logical validation is performed before unicode normalization, the
+            security checks or logical validation could be bypassed due to a potential Unicode
+            character collision. The validation we consider are: any character escaping, any regex
+            validation, or any string manipulation (such as <code>str.split</code>). </p>
+    </overview>
+    <recommendation>
+        <p> Perform Unicode normalization before the logical validation. </p>
+    </recommendation>
+    <example>
+
+        <p> The following example showcases the bypass of all checks performed by <code>
+            flask.escape()</code> due to a post-unicode normalization.</p>
+        <p>For instance: the character U+FE64 (<code>﹤</code>) is not filtered-out by the flask
+            escape function. But due to the Unicode normalization, the character is transformed and
+            would become U+003C (<code> &lt; </code> ).</p>
+
+        <sample src="escape-bypass.py" />
+
+    </example>
+    <references>
+        <li> Research study: <a
+                href="https://gosecure.github.io/presentations/2021-02-unicode-owasp-toronto/philippe_arteau_owasp_unicode_v4.pdf">
+            Unicode vulnerabilities that could bYte you
+            </a> and <a
+                href="https://gosecure.github.io/unicode-pentester-cheatsheet/">Unicode pentest
+            cheatsheet</a>. </li>
+    </references>
+</qhelp>
--- a/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.ql
+++ b/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Bypass Logical Validation Using Unicode Characters
+ * @description A Unicode transformation is using a remote user-controlled data. The transformation is a Unicode normalization using the algorithms "NFC" or "NFKC". In all cases, the security measures implemented or the logical validation performed to escape any injection characters, to validate using regex patterns or to perform string-based checks, before the Unicode transformation are **bypassable** by special Unicode characters.
+ * @kind path-problem
+ * @id py/unicode-bypass-validation
+ * @precision high
+ * @problem.severity error
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-176
+ *       external/cwe/cwe-179
+ *       external/cwe/cwe-180
+ */
+
+import python
+import UnicodeBypassValidationQuery
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink,
+  "This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters.",
+  sink.getNode(), "Unicode transformation (Unicode normalization)", source.getNode(),
+  "remote user-controlled data"
--- a/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidationCustomizations.qll
+++ b/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidationCustomizations.qll
@@ -0,0 +1,30 @@
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "Unicode transformation"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "Unicode transformation"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+module UnicodeBypassValidation {
+  /**
+   * A data flow source for "Unicode transformation" vulnerabilities.
+   */
+  abstract class Source extends DataFlow::Node { }
+
+  /**
+   * A data flow sink for "Unicode transformation" vulnerabilities.
+   */
+  abstract class Sink extends DataFlow::Node { }
+
+  /**
+   * A sanitizer for "Unicode transformation" vulnerabilities.
+   */
+  abstract class Sanitizer extends DataFlow::Node { }
+}
--- a/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidationQuery.qll
+++ b/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidationQuery.qll
@@ -0,0 +1,73 @@
+/**
+ * Provides a taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
+ */
+
+private import python
+import semmle.python.ApiGraphs
+import semmle.python.Concepts
+import semmle.python.dataflow.new.internal.DataFlowPublic
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.dataflow.new.internal.TaintTrackingPrivate
+import semmle.python.dataflow.new.RemoteFlowSources
+import UnicodeBypassValidationCustomizations::UnicodeBypassValidation
+
+/** A state signifying that a logical validation has not been performed. */
+class PreValidation extends DataFlow::FlowState {
+  PreValidation() { this = "PreValidation" }
+}
+
+/** A state signifying that a logical validation has been performed. */
+class PostValidation extends DataFlow::FlowState {
+  PostValidation() { this = "PostValidation" }
+}
+
+/**
+ * A taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
+ *
+ * This configuration uses two flow states, `PreValidation` and `PostValidation`,
+ * to track the requirement that a logical validation has been performed before the Unicode Transformation.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "UnicodeBypassValidation" }
+
+  override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
+    source instanceof RemoteFlowSource and state instanceof PreValidation
+  }
+
+  override predicate isAdditionalTaintStep(
+    DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
+    DataFlow::FlowState stateTo
+  ) {
+    (
+      exists(Escaping escaping | nodeFrom = escaping.getAnInput() and nodeTo = escaping.getOutput())
+      or
+      exists(RegexExecution re | nodeFrom = re.getString() and nodeTo = re)
+      or
+      stringManipulation(nodeFrom, nodeTo) and
+      not nodeTo.(DataFlow::MethodCallNode).getMethodName() in ["encode", "decode"]
+    ) and
+    stateFrom instanceof PreValidation and
+    stateTo instanceof PostValidation
+  }
+
+  /* A Unicode Tranformation (Unicode tranformation) is considered a sink when the algorithm used is either NFC or NFKC.  */
+  override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
+    exists(API::CallNode cn |
+      cn = API::moduleImport("unicodedata").getMember("normalize").getACall() and
+      sink = cn.getArg(1)
+      or
+      cn = API::moduleImport("unidecode").getMember("unidecode").getACall() and
+      sink = cn.getArg(0)
+      or
+      cn = API::moduleImport("pyunormalize").getMember(["NFC", "NFD", "NFKC", "NFKD"]).getACall() and
+      sink = cn.getArg(0)
+      or
+      cn = API::moduleImport("pyunormalize").getMember("normalize").getACall() and
+      sink = cn.getArg(1)
+      or
+      cn = API::moduleImport("textnorm").getMember("normalize_unicode").getACall() and
+      sink = cn.getArg(0)
+    ) and
+    state instanceof PostValidation
+  }
+}
--- a/python/ql/src/experimental/Security/CWE-176/escape-bypass.py
+++ b/python/ql/src/experimental/Security/CWE-176/escape-bypass.py
@@ -0,0 +1,11 @@
+import unicodedata
+from flask import Flask, request, escape, render_template
+
+app = Flask(__name__)
+
+
+@app.route("/unsafe1")
+def unsafe1():
+    user_input = escape(request.args.get("ui"))
+    normalized_user_input = unicodedata.normalize("NFKC", user_input)
+    return render_template("result.html", normalized_user_input=normalized_user_input)
--- a/python/ql/src/meta/analysis-quality/SummarizedCallableCallSites.ql
+++ b/python/ql/src/meta/analysis-quality/SummarizedCallableCallSites.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Summarized callable call sites
+ * @description A call site for which we have a summarized callable
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/summarized-callable-call-sites
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.FlowSummary
+import meta.MetaMetrics
+
+from DataFlow::Node useSite, SummarizedCallable target, string kind
+where
+  (
+    useSite = target.getACall() and kind = "Call"
+    or
+    useSite = target.getACallback() and kind = "Callback"
+  ) and
+  not useSite.getLocation().getFile() instanceof IgnoredFile
+select useSite, kind + " to " + target
--- a/python/ql/test/experimental/query-tests/Security/CWE-176/UnicodeBypassValidation.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-176/UnicodeBypassValidation.expected
@@ -0,0 +1,29 @@
+edges
+| samples.py:2:26:2:32 | ControlFlowNode for ImportMember | samples.py:2:26:2:32 | GSSA Variable request |
+| samples.py:2:26:2:32 | GSSA Variable request | samples.py:9:25:9:31 | ControlFlowNode for request |
+| samples.py:2:26:2:32 | GSSA Variable request | samples.py:16:25:16:31 | ControlFlowNode for request |
+| samples.py:9:18:9:47 | ControlFlowNode for escape() | samples.py:10:59:10:68 | ControlFlowNode for user_input |
+| samples.py:9:25:9:31 | ControlFlowNode for request | samples.py:9:25:9:36 | ControlFlowNode for Attribute |
+| samples.py:9:25:9:36 | ControlFlowNode for Attribute | samples.py:9:25:9:46 | ControlFlowNode for Attribute() |
+| samples.py:9:25:9:46 | ControlFlowNode for Attribute() | samples.py:9:18:9:47 | ControlFlowNode for escape() |
+| samples.py:16:18:16:47 | ControlFlowNode for escape() | samples.py:20:62:20:71 | ControlFlowNode for user_input |
+| samples.py:16:25:16:31 | ControlFlowNode for request | samples.py:16:25:16:36 | ControlFlowNode for Attribute |
+| samples.py:16:25:16:36 | ControlFlowNode for Attribute | samples.py:16:25:16:46 | ControlFlowNode for Attribute() |
+| samples.py:16:25:16:46 | ControlFlowNode for Attribute() | samples.py:16:18:16:47 | ControlFlowNode for escape() |
+nodes
+| samples.py:2:26:2:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
+| samples.py:2:26:2:32 | GSSA Variable request | semmle.label | GSSA Variable request |
+| samples.py:9:18:9:47 | ControlFlowNode for escape() | semmle.label | ControlFlowNode for escape() |
+| samples.py:9:25:9:31 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| samples.py:9:25:9:36 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| samples.py:9:25:9:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| samples.py:10:59:10:68 | ControlFlowNode for user_input | semmle.label | ControlFlowNode for user_input |
+| samples.py:16:18:16:47 | ControlFlowNode for escape() | semmle.label | ControlFlowNode for escape() |
+| samples.py:16:25:16:31 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
+| samples.py:16:25:16:36 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| samples.py:16:25:16:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| samples.py:20:62:20:71 | ControlFlowNode for user_input | semmle.label | ControlFlowNode for user_input |
+subpaths
+#select
+| samples.py:10:59:10:68 | ControlFlowNode for user_input | samples.py:2:26:2:32 | ControlFlowNode for ImportMember | samples.py:10:59:10:68 | ControlFlowNode for user_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | samples.py:10:59:10:68 | ControlFlowNode for user_input | Unicode transformation (Unicode normalization) | samples.py:2:26:2:32 | ControlFlowNode for ImportMember | remote user-controlled data |
+| samples.py:20:62:20:71 | ControlFlowNode for user_input | samples.py:2:26:2:32 | ControlFlowNode for ImportMember | samples.py:20:62:20:71 | ControlFlowNode for user_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | samples.py:20:62:20:71 | ControlFlowNode for user_input | Unicode transformation (Unicode normalization) | samples.py:2:26:2:32 | ControlFlowNode for ImportMember | remote user-controlled data |
--- a/python/ql/test/experimental/query-tests/Security/CWE-176/UnicodeBypassValidation.qlref
+++ b/python/ql/test/experimental/query-tests/Security/CWE-176/UnicodeBypassValidation.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-176/UnicodeBypassValidation.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-176/samples.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-176/samples.py
@@ -0,0 +1,30 @@
+import unicodedata
+from flask import Flask, request, escape, render_template
+
+app = Flask(__name__)
+
+
+@app.route("/unsafe1")
+def unsafe1():
+    user_input = escape(request.args.get("ui"))
+    normalized_user_input = unicodedata.normalize("NFKC", user_input)  # $result=BAD
+    return render_template("result.html", normalized_user_input=normalized_user_input)
+
+
+@app.route("/unsafe2")
+def unsafe1bis():
+    user_input = escape(request.args.get("ui"))
+    if user_input.isascii():
+        normalized_user_input = user_input
+    else:
+        normalized_user_input = unicodedata.normalize("NFC", user_input)  # $result=BAD
+    return render_template("result.html", normalized_user_input=normalized_user_input)
+
+
+@app.route("/safe1")
+def safe1():
+    normalized_user_input = unicodedata.normalize(
+        "NFKC", request.args.get("ui")
+    )  # $result=OK
+    user_input = escape(normalized_user_input)
+    return render_template("result.html", normalized_user_input=user_input)
				`@@ -0,0 +1 @@`
				`experimental/Security/CWE-176/UnicodeBypassValidation.ql`