Merge branch 'main' into cwe497b

2026-05-03 04:39:29 +02:00 · 2022-03-25 11:57:30 +00:00
parent e377eebdbc b91914bd89
commit 9f3fd57534
381 changed files with 25652 additions and 3591 deletions
--- a/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.ql
+++ b/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Extraction errors
+ * @description List all extraction errors for files in the source code directory.
+ * @kind diagnostic
+ * @id cpp/diagnostics/extraction-errors
+ */
+
+import cpp
+import ExtractionErrors
+
+// NOTE:
+// This file looks like the other `diagnostics/extraction-errors` queries in other CodeQL supported
+// languages. However, since this diagnostic query is located in the `Internal` subdirectory it will not
+// appear in the Code Scanning suite. The related query `cpp/diagnostics/extraction-warnings` is,
+// however, included as a public diagnostics query.
+from ExtractionError error
+where
+  error instanceof ExtractionUnknownError or
+  exists(error.getFile().getRelativePath())
+select error, "Extraction failed in " + error.getFile() + " with error " + error.getErrorMessage(),
+  error.getSeverity()
--- a/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.qll
+++ b/cpp/ql/src/Diagnostics/Internal/ExtractionErrors.qll
@@ -0,0 +1,137 @@
+/**
+ * Provides a common hierarchy of all types of errors that can occur during extraction.
+ */
+
+import cpp
+
+/*
+ * A note about how the C/C++ extractor emits diagnostics:
+ * When the extractor frontend encounters an error, it emits a diagnostic message,
+ * that includes a message, location and severity.
+ * However, that process is best-effort and may fail (e.g. due to lack of memory).
+ * Thus, if the extractor emitted at least one diagnostic of severity discretionary
+ * error (or higher), it *also* emits a simple "There was an error during this compilation"
+ * error diagnostic, without location information.
+ * In the common case, this means that a compilation during which one or more errors happened also gets
+ * the catch-all diagnostic.
+ * This diagnostic has the empty string as file path.
+ * We filter out these useless diagnostics if there is at least one error-level diagnostic
+ * for the affected compilation in the database.
+ * Otherwise, we show it to indicate that something went wrong and that we
+ * don't know what exactly happened.
+ */
+
+/**
+ * An error that, if present, leads to a file being marked as non-successfully extracted.
+ */
+class ReportableError extends Diagnostic {
+  ReportableError() {
+    (
+      this instanceof CompilerDiscretionaryError or
+      this instanceof CompilerError or
+      this instanceof CompilerCatastrophe
+    ) and
+    // Filter for the catch-all diagnostic, see note above.
+    not this.getFile().getAbsolutePath() = ""
+  }
+}
+
+private newtype TExtractionError =
+  TReportableError(ReportableError err) or
+  TCompilationFailed(Compilation c, File f) {
+    f = c.getAFileCompiled() and not c.normalTermination()
+  } or
+  // Show the catch-all diagnostic (see note above) only if we haven't seen any other error-level diagnostic
+  // for that compilation
+  TUnknownError(CompilerError err) {
+    not exists(ReportableError e | e.getCompilation() = err.getCompilation())
+  }
+
+/**
+ * Superclass for the extraction error hierarchy.
+ */
+class ExtractionError extends TExtractionError {
+  /** Gets the string representation of the error. */
+  string toString() { none() }
+
+  /** Gets the error message for this error. */
+  string getErrorMessage() { none() }
+
+  /** Gets the file this error occured in. */
+  File getFile() { none() }
+
+  /** Gets the location this error occured in. */
+  Location getLocation() { none() }
+
+  /** Gets the SARIF severity of this error. */
+  int getSeverity() {
+    // Unfortunately, we can't distinguish between errors and fatal errors in SARIF,
+    // so all errors have severity 2.
+    result = 2
+  }
+}
+
+/**
+ * An unrecoverable extraction error, where extraction was unable to finish.
+ * This can be caused by a multitude of reasons, for example:
+ *  - hitting a frontend assertion
+ *  - crashing due to dereferencing an invalid pointer
+ *  - stack overflow
+ *  - out of memory
+ */
+class ExtractionUnrecoverableError extends ExtractionError, TCompilationFailed {
+  Compilation c;
+  File f;
+
+  ExtractionUnrecoverableError() { this = TCompilationFailed(c, f) }
+
+  override string toString() {
+    result = "Unrecoverable extraction error while compiling " + f.toString()
+  }
+
+  override string getErrorMessage() { result = "unrecoverable compilation failure." }
+
+  override File getFile() { result = f }
+
+  override Location getLocation() { result = f.getLocation() }
+}
+
+/**
+ * A recoverable extraction error.
+ * These are compiler errors from the frontend.
+ * Upon encountering one of these, we still continue extraction, but the
+ * database will be incomplete for that file.
+ */
+class ExtractionRecoverableError extends ExtractionError, TReportableError {
+  ReportableError err;
+
+  ExtractionRecoverableError() { this = TReportableError(err) }
+
+  override string toString() { result = "Recoverable extraction error: " + err }
+
+  override string getErrorMessage() { result = err.getFullMessage() }
+
+  override File getFile() { result = err.getFile() }
+
+  override Location getLocation() { result = err.getLocation() }
+}
+
+/**
+ * An unknown error happened during extraction.
+ * These are only displayed if we know that we encountered an error during extraction,
+ * but, for some reason, failed to emit a proper diagnostic with location information
+ * and error message.
+ */
+class ExtractionUnknownError extends ExtractionError, TUnknownError {
+  CompilerError err;
+
+  ExtractionUnknownError() { this = TUnknownError(err) }
+
+  override string toString() { result = "Unknown extraction error: " + err }
+
+  override string getErrorMessage() { result = err.getFullMessage() }
+
+  override File getFile() { result = err.getFile() }
+
+  override Location getLocation() { result = err.getLocation() }
+}
--- a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
+++ b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql
@@ -19,9 +19,9 @@ import semmle.code.cpp.security.Security
 import semmle.code.cpp.valuenumbering.GlobalValueNumbering
 import semmle.code.cpp.ir.IR
 import semmle.code.cpp.ir.dataflow.TaintTracking
-import semmle.code.cpp.ir.dataflow.TaintTracking2
 import semmle.code.cpp.security.FlowSources
 import semmle.code.cpp.models.implementations.Strcat
+import DataFlow::PathGraph

 Expr sinkAsArgumentIndirection(DataFlow::Node sink) {
  result =
@@ -66,154 +66,70 @@ predicate interestingConcatenation(DataFlow::Node fst, DataFlow::Node snd) {
  )
 }

-class TaintToConcatenationConfiguration extends TaintTracking::Configuration {
-  TaintToConcatenationConfiguration() { this = "TaintToConcatenationConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof FlowSource }
-
-  override predicate isSink(DataFlow::Node sink) { interestingConcatenation(sink, _) }
-
-  override predicate isSanitizer(DataFlow::Node node) {
-    node.asInstruction().getResultType() instanceof IntegralType
-    or
-    node.asInstruction().getResultType() instanceof FloatingPointType
-  }
+class ConcatState extends DataFlow::FlowState {
+  ConcatState() { this = "ConcatState" }
 }

-class ExecTaintConfiguration extends TaintTracking2::Configuration {
+class ExecState extends DataFlow::FlowState {
+  DataFlow::Node fst;
+  DataFlow::Node snd;
+
+  ExecState() {
+    this =
+      "ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and
+    interestingConcatenation(fst, snd)
+  }
+
+  DataFlow::Node getFstNode() { result = fst }
+
+  DataFlow::Node getSndNode() { result = snd }
+}
+
+class ExecTaintConfiguration extends TaintTracking::Configuration {
  ExecTaintConfiguration() { this = "ExecTaintConfiguration" }

-  override predicate isSource(DataFlow::Node source) {
-    exists(DataFlow::Node prevSink, TaintToConcatenationConfiguration conf |
-      conf.hasFlow(_, prevSink) and
-      interestingConcatenation(prevSink, source)
-    )
+  override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
+    source instanceof FlowSource and
+    state instanceof ConcatState
  }

-  override predicate isSink(DataFlow::Node sink) {
-    shellCommand(sinkAsArgumentIndirection(sink), _)
+  override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
+    shellCommand(sinkAsArgumentIndirection(sink), _) and
+    state instanceof ExecState
  }

-  override predicate isSanitizerOut(DataFlow::Node node) {
-    isSink(node) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
-  }
-}
-
-module StitchedPathGraph {
-  // There's a different PathNode class for each DataFlowImplN.qll, so we can't simply combine the
-  // PathGraph predicates directly. Instead, we use a newtype so there's a single type that
-  // contains both sets of PathNodes.
-  newtype TMergedPathNode =
-    TPathNode1(DataFlow::PathNode node) or
-    TPathNode2(DataFlow2::PathNode node)
-
-  // this wraps the toString and location predicates so we can use the merged node type in a
-  // selection
-  class MergedPathNode extends TMergedPathNode {
-    string toString() {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        result = n.toString()
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        result = n.toString()
-      )
-    }
-
-    DataFlow::Node getNode() {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        result = n.getNode()
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        result = n.getNode()
-      )
-    }
-
-    DataFlow::PathNode getPathNode1() { this = TPathNode1(result) }
-
-    DataFlow2::PathNode getPathNode2() { this = TPathNode2(result) }
-
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      exists(DataFlow::PathNode n |
-        this = TPathNode1(n) and
-        n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-      )
-      or
-      exists(DataFlow2::PathNode n |
-        this = TPathNode2(n) and
-        n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-      )
-    }
-  }
-
-  query predicate edges(MergedPathNode a, MergedPathNode b) {
-    exists(DataFlow::PathNode an, DataFlow::PathNode bn |
-      a = TPathNode1(an) and
-      b = TPathNode1(bn) and
-      DataFlow::PathGraph::edges(an, bn)
-    )
-    or
-    exists(DataFlow2::PathNode an, DataFlow2::PathNode bn |
-      a = TPathNode2(an) and
-      b = TPathNode2(bn) and
-      DataFlow2::PathGraph::edges(an, bn)
-    )
-    or
-    // This is where paths from the two configurations are connected. `interestingConcatenation`
-    // is the only thing in this module that's actually specific to the query - everything else is
-    // just using types and predicates from the DataFlow library.
-    interestingConcatenation(a.getNode(), b.getNode()) and
-    a instanceof TPathNode1 and
-    b instanceof TPathNode2
-  }
-
-  query predicate nodes(MergedPathNode mpn, string key, string val) {
-    // here we just need the union of the underlying `nodes` predicates
-    exists(DataFlow::PathNode n |
-      mpn = TPathNode1(n) and
-      DataFlow::PathGraph::nodes(n, key, val)
-    )
-    or
-    exists(DataFlow2::PathNode n |
-      mpn = TPathNode2(n) and
-      DataFlow2::PathGraph::nodes(n, key, val)
-    )
-  }
-
-  query predicate subpaths(
-    MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
+  override predicate isAdditionalTaintStep(
+    DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
+    DataFlow::FlowState state2
  ) {
-    // just forward subpaths from the underlying libraries. This might be slightly awkward when
-    // the concatenation is deep in a call chain.
-    DataFlow::PathGraph::subpaths(arg.getPathNode1(), par.getPathNode1(), ret.getPathNode1(),
-      out.getPathNode1())
-    or
-    DataFlow2::PathGraph::subpaths(arg.getPathNode2(), par.getPathNode2(), ret.getPathNode2(),
-      out.getPathNode2())
+    state1 instanceof ConcatState and
+    state2.(ExecState).getFstNode() = node1 and
+    state2.(ExecState).getSndNode() = node2
+  }
+
+  override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
+    (
+      node.asInstruction().getResultType() instanceof IntegralType
+      or
+      node.asInstruction().getResultType() instanceof FloatingPointType
+    ) and
+    state instanceof ConcatState
+  }
+
+  override predicate isSanitizerOut(DataFlow::Node node, DataFlow::FlowState state) {
+    isSink(node, state) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
  }
 }

-import StitchedPathGraph
-
 from
-  DataFlow::PathNode sourceNode, DataFlow::PathNode concatSink, DataFlow2::PathNode concatSource,
-  DataFlow2::PathNode sinkNode, string taintCause, string callChain,
-  TaintToConcatenationConfiguration conf1, ExecTaintConfiguration conf2
+  ExecTaintConfiguration conf, DataFlow::PathNode sourceNode, DataFlow::PathNode sinkNode,
+  string taintCause, string callChain, DataFlow::Node concatResult
 where
+  conf.hasFlowPath(sourceNode, sinkNode) and
  taintCause = sourceNode.getNode().(FlowSource).getSourceType() and
-  conf1.hasFlowPath(sourceNode, concatSink) and
-  interestingConcatenation(concatSink.getNode(), concatSource.getNode()) and // this loses call context
-  conf2.hasFlowPath(concatSource, sinkNode) and
-  shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain)
-select sinkAsArgumentIndirection(sinkNode.getNode()), TPathNode1(sourceNode).(MergedPathNode),
-  TPathNode2(sinkNode).(MergedPathNode),
+  shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain) and
+  concatResult = sinkNode.getState().(ExecState).getSndNode()
+select sinkAsArgumentIndirection(sinkNode.getNode()), sourceNode, sinkNode,
  "This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to "
-    + callChain, sourceNode, "user input (" + taintCause + ")", concatSource,
-  concatSource.toString()
+    + callChain, sourceNode, "user input (" + taintCause + ")", concatResult,
+  concatResult.toString()
--- a/cpp/ql/src/change-notes/2022-03-21-command-line-injection-with-flow-states.md
+++ b/cpp/ql/src/change-notes/2022-03-21-command-line-injection-with-flow-states.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* The `cpp/command-line-injection` query now takes into account calling contexts across string concatenations. This removes false positives due to mismatched calling contexts before and after string concatenations.
--- a/cpp/ql/src/experimental/Security/CWE/CWE-362/double-fetch.ql
+++ b/cpp/ql/src/experimental/Security/CWE/CWE-362/double-fetch.ql
@@ -0,0 +1,46 @@
+/**
+ * @name Linux kernel double-fetch vulnerability detection
+ * @description Double-fetch is a very common vulnerability pattern
+ *              in linux kernel, attacker can exploit double-fetch
+ *              issues to obatain root privilege.
+ *              Double-fetch is caused by fetching data from user
+ *              mode by calling copy_from_user twice, CVE-2016-6480
+ *              is quite a good example for your information.
+ * @kind problem
+ * @id cpp/linux-kernel-double-fetch-vulnerability
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @tags security
+ *       external/cwe/cwe-362
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+
+class CopyFromUserFunctionCall extends FunctionCall {
+  CopyFromUserFunctionCall() {
+    this.getTarget().getName() = "copy_from_user" and
+    not this.getArgument(1) instanceof AddressOfExpr
+  }
+
+  //root cause of double-fetech issue is read from
+  //the same user mode memory twice, so it makes
+  //sense that only check user mode pointer
+  predicate readFromSameUserModePointer(CopyFromUserFunctionCall another) {
+    globalValueNumber(this.getArgument(1)) = globalValueNumber(another.getArgument(1))
+  }
+}
+
+from CopyFromUserFunctionCall p1, CopyFromUserFunctionCall p2
+where
+  not p1 = p2 and
+  p1.readFromSameUserModePointer(p2) and
+  exists(IfStmt ifStmt |
+    p1.getBasicBlock().getAFalseSuccessor*() = ifStmt.getBasicBlock() and
+    ifStmt.getBasicBlock().getAFalseSuccessor*() = p2.getBasicBlock()
+  ) and
+  not exists(AssignPointerAddExpr assignPtrAdd |
+    globalValueNumber(p1.getArgument(1)) = globalValueNumber(assignPtrAdd.getLValue()) and
+    p1.getBasicBlock().getAFalseSuccessor*() = assignPtrAdd.getBasicBlock()
+  )
+select p2, "Double fetch vulnerability. First fetch was $@.", p1, p1.toString()