Merge branch 'main' of https://github.com/github/codeql into post-release-prep/codeql-cli-2.25.1

This commit is contained in:
Óscar San José
2026-03-30 10:51:12 +02:00
765 changed files with 13826 additions and 27987 deletions

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* Added a class `IndirectUninitializedNode` to represent the indirection of an uninitialized local variable as a dataflow node.

View File

@@ -0,0 +1,5 @@
---
category: feature
---
* Added a class `DataFlow::IndirectParameterNode` to represent the indirection of a parameter as a dataflow node.
* Added a predicate `Node::asIndirectInstruction` which returns the `Instruction` that defines the indirect dataflow node, if any.

View File

@@ -0,0 +1,4 @@
---
category: breaking
---
* The `SourceModelCsv`, `SinkModelCsv`, and `SummaryModelCsv` classes and the associated CSV parsing infrastructure have been removed from `ExternalFlow.qll`. New models should be added as `.model.yml` files in the `ext/` directory.

View File

@@ -0,0 +1,22 @@
# ZeroMQ networking library models
extensions:
- addsTo:
pack: codeql/cpp-all
extensible: sourceModel
data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance
- ["", "", False, "zmq_recv", "", "", "Argument[*1]", "remote", "manual"]
- ["", "", False, "zmq_recvmsg", "", "", "Argument[*1]", "remote", "manual"]
- ["", "", False, "zmq_msg_recv", "", "", "Argument[*0]", "remote", "manual"]
- addsTo:
pack: codeql/cpp-all
extensible: sinkModel
data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance
- ["", "", False, "zmq_send", "", "", "Argument[*1]", "remote-sink", "manual"]
- ["", "", False, "zmq_sendmsg", "", "", "Argument[*1]", "remote-sink", "manual"]
- ["", "", False, "zmq_msg_send", "", "", "Argument[*0]", "remote-sink", "manual"]
- addsTo:
pack: codeql/cpp-all
extensible: summaryModel
data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance
- ["", "", False, "zmq_msg_init_data", "", "", "Argument[*1]", "Argument[*0]", "taint", "manual"]
- ["", "", False, "zmq_msg_data", "", "", "Argument[*0]", "ReturnValue[*]", "taint", "manual"]

View File

@@ -0,0 +1,19 @@
# Models for getc and similar character-reading functions
extensions:
- addsTo:
pack: codeql/cpp-all
extensible: sourceModel
data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance
- ["", "", False, "getc", "", "", "ReturnValue", "remote", "manual"]
- ["", "", False, "getwc", "", "", "ReturnValue", "remote", "manual"]
- ["", "", False, "_getc_nolock", "", "", "ReturnValue", "remote", "manual"]
- ["", "", False, "_getwc_nolock", "", "", "ReturnValue", "remote", "manual"]
- ["", "", False, "getch", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getch", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getwch", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getch_nolock", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getwch_nolock", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "getchar", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "getwchar", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getchar_nolock", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "_getwchar_nolock", "", "", "ReturnValue", "local", "manual"]

View File

@@ -524,6 +524,12 @@ class Function extends Declaration, ControlFlowNode, AccessHolder, @function {
not exists(NewOrNewArrayExpr new | e = new.getAllocatorCall().getArgument(0))
)
}
/**
* Holds if this function has an ambiguous return type, meaning that zero or multiple return
* types for this function are present in the database (this can occur in `build-mode: none`).
*/
predicate hasAmbiguousReturnType() { count(this.getType()) != 1 }
}
pragma[noinline]

View File

@@ -163,12 +163,23 @@ predicate primitiveVariadicFormatter(
)
}
/**
* Gets a function call whose target is a variadic formatter with the given
* `type`, `format` parameter index and `output` parameter index.
*
* Join-order helper for `callsVariadicFormatter`.
*/
pragma[nomagic]
private predicate callsVariadicFormatterCall(FunctionCall fc, string type, int format, int output) {
variadicFormatter(fc.getTarget(), type, format, output)
}
private predicate callsVariadicFormatter(
Function f, string type, int formatParamIndex, int outputParamIndex
) {
// calls a variadic formatter with `formatParamIndex`, `outputParamIndex` linked
exists(FunctionCall fc, int format, int output |
variadicFormatter(pragma[only_bind_into](fc.getTarget()), type, format, output) and
callsVariadicFormatterCall(fc, type, format, output) and
fc.getEnclosingFunction() = f and
fc.getArgument(format) = f.getParameter(formatParamIndex).getAnAccess() and
fc.getArgument(output) = f.getParameter(outputParamIndex).getAnAccess()
@@ -176,7 +187,7 @@ private predicate callsVariadicFormatter(
or
// calls a variadic formatter with only `formatParamIndex` linked
exists(FunctionCall fc, string calledType, int format, int output |
variadicFormatter(pragma[only_bind_into](fc.getTarget()), calledType, format, output) and
callsVariadicFormatterCall(fc, calledType, format, output) and
fc.getEnclosingFunction() = f and
fc.getArgument(format) = f.getParameter(formatParamIndex).getAnAccess() and
not fc.getArgument(output) = f.getParameter(_).getAnAccess() and

View File

@@ -1,9 +1,10 @@
/**
* INTERNAL use only. This is an experimental API subject to change without notice.
*
* Provides classes and predicates for dealing with flow models specified in CSV format.
* Provides classes and predicates for dealing with flow models specified
* in data extension files.
*
* The CSV specification has the following columns:
* The extensible relations have the following columns:
* - Sources:
* `namespace; type; subtypes; name; signature; ext; output; kind`
* - Sinks:
@@ -104,117 +105,9 @@ private import internal.FlowSummaryImpl::Private
private import internal.FlowSummaryImpl::Private::External
private import internal.ExternalFlowExtensions::Extensions as Extensions
private import codeql.mad.ModelValidation as SharedModelVal
private import codeql.util.Unit
private import codeql.mad.static.ModelsAsData as SharedMaD
/**
* A unit class for adding additional source model rows.
*
* Extend this class to add additional source definitions.
*/
class SourceModelCsv extends Unit {
/** Holds if `row` specifies a source definition. */
abstract predicate row(string row);
}
/**
* A unit class for adding additional sink model rows.
*
* Extend this class to add additional sink definitions.
*/
class SinkModelCsv extends Unit {
/** Holds if `row` specifies a sink definition. */
abstract predicate row(string row);
}
/**
* A unit class for adding additional summary model rows.
*
* Extend this class to add additional flow summary definitions.
*/
class SummaryModelCsv extends Unit {
/** Holds if `row` specifies a summary definition. */
abstract predicate row(string row);
}
/** Holds if `row` is a source model. */
predicate sourceModel(string row) { any(SourceModelCsv s).row(row) }
/** Holds if `row` is a sink model. */
predicate sinkModel(string row) { any(SinkModelCsv s).row(row) }
/** Holds if `row` is a summary model. */
predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) }
private module MadInput implements SharedMaD::InputSig {
/** Holds if a source model exists for the given parameters. */
predicate additionalSourceModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string output, string kind, string provenance, string model
) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = output and
row.splitAt(";", 7) = kind
) and
provenance = "manual" and
model = ""
}
/** Holds if a sink model exists for the given parameters. */
predicate additionalSinkModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind, string provenance, string model
) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and
row.splitAt(";", 7) = kind
) and
provenance = "manual" and
model = ""
}
/**
* Holds if a summary model exists for the given parameters.
*
* This predicate does not expand `@` to `*`s.
*/
predicate additionalSummaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, string provenance, string model
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and
row.splitAt(";", 7) = output and
row.splitAt(";", 8) = kind
) and
provenance = "manual" and
model = ""
}
string namespaceSegmentSeparator() { result = "::" }
}
@@ -250,8 +143,8 @@ predicate summaryModel(
)
}
/** Provides a query predicate to check the CSV data for validation errors. */
module CsvValidation {
/** Provides a query predicate to check the data for validation errors. */
module ModelValidation {
private string getInvalidModelInput() {
exists(string pred, AccessPath input, string part |
sinkModel(_, _, _, _, _, _, input, _, _, _) and pred = "sink"
@@ -294,40 +187,6 @@ module CsvValidation {
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
private string getInvalidModelSubtype() {
exists(string pred, string row |
sourceModel(row) and pred = "source"
or
sinkModel(row) and pred = "sink"
or
summaryModel(row) and pred = "summary"
|
exists(string b |
b = row.splitAt(";", 2) and
not b = ["true", "false"] and
result = "Invalid boolean \"" + b + "\" in " + pred + " model."
)
)
}
private string getInvalidModelColumnCount() {
exists(string pred, string row, int expect |
sourceModel(row) and expect = 8 and pred = "source"
or
sinkModel(row) and expect = 8 and pred = "sink"
or
summaryModel(row) and expect = 9 and pred = "summary"
|
exists(int cols |
cols = 1 + max(int n | exists(row.splitAt(";", n))) and
cols != expect and
result =
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
"."
)
)
}
private string getInvalidModelSignature() {
exists(string pred, string namespace, string type, string name, string signature, string ext |
sourceModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "source"
@@ -366,13 +225,12 @@ module CsvValidation {
)
}
/** Holds if some row in a CSV-based flow model appears to contain typos. */
/** Holds if some row in a MaD flow model appears to contain typos. */
query predicate invalidModelRow(string msg) {
msg =
[
getInvalidModelSignature(), getInvalidModelInput(), getInvalidModelOutput(),
getInvalidModelSubtype(), getInvalidModelColumnCount(), KindVal::getInvalidModelKind(),
getIncorrectConstructorSummaryOutput()
KindVal::getInvalidModelKind(), getIncorrectConstructorSummaryOutput()
]
}
}
@@ -1026,7 +884,7 @@ private module Cached {
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
cached
@@ -1037,7 +895,7 @@ private module Cached {
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
cached

View File

@@ -238,7 +238,12 @@ private module TrackVirtualDispatch<methodDispatchSig/1 virtualDispatch0> {
private import TypeTracking<Location, TtInput>::TypeTrack<qualifierSource/1>::Graph<qualifierOfVirtualCall/1>
private predicate edgePlus(PathNode n1, PathNode n2) = fastTC(edges/2)(n1, n2)
private predicate isSource(PathNode n) { n.isSource() }
private predicate isSink(PathNode n) { n.isSink() }
private predicate edgePlus(PathNode n1, PathNode n2) =
doublyBoundedFastTC(edges/2, isSource/1, isSink/1)(n1, n2)
/**
* Gets the most specific implementation of `mf` that may be called when the
@@ -255,6 +260,15 @@ private module TrackVirtualDispatch<methodDispatchSig/1 virtualDispatch0> {
)
}
pragma[nomagic]
private MemberFunction mostSpecificForSource(PathNode p1, MemberFunction mf) {
p1.isSource() and
exists(Class derived |
qualifierSourceImpl(p1.getNode(), derived) and
result = mostSpecific(mf, derived)
)
}
/**
* Gets a possible pair of end-points `(p1, p2)` where:
* - `p1` is a derived-to-base conversion that converts from some
@@ -264,16 +278,16 @@ private module TrackVirtualDispatch<methodDispatchSig/1 virtualDispatch0> {
* - `callable` is the most specific implementation that may be called when
* the qualifier has type `derived`.
*/
bindingset[p1, p2]
pragma[inline_late]
private predicate pairCand(
PathNode p1, PathNode p2, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::DataFlowCall call
) {
exists(Class derived, MemberFunction mf |
qualifierSourceImpl(p1.getNode(), derived) and
p2.isSink() and
exists(MemberFunction mf |
qualifierOfVirtualCallImpl(p2.getNode(), call.asCallInstruction(), mf) and
p1.isSource() and
p2.isSink() and
callable.asSourceCallable() = mostSpecific(mf, derived)
callable.asSourceCallable() = mostSpecificForSource(p1, mf)
)
}

View File

@@ -321,6 +321,12 @@ module Public {
*/
Operand asIndirectOperand(int index) { hasOperandAndIndex(this, result, index) }
/**
* Gets the instruction that is indirectly tracked by this node behind
* `index` number of indirections.
*/
Instruction asIndirectInstruction(int index) { hasInstructionAndIndex(this, result, index) }
/**
* Holds if this node is at index `i` in basic block `block`.
*
@@ -617,6 +623,25 @@ module Public {
*/
LocalVariable asUninitialized() { result = this.(UninitializedNode).getLocalVariable() }
/**
* Gets the uninitialized local variable corresponding to this node behind
* `index` number of indirections, if any.
*/
LocalVariable asIndirectUninitialized(int index) {
exists(IndirectUninitializedNode indirectUninitializedNode |
this = indirectUninitializedNode and
indirectUninitializedNode.getIndirectionIndex() = index
|
result = indirectUninitializedNode.getLocalVariable()
)
}
/**
* Gets the uninitialized local variable corresponding to this node behind
* a number indirections, if any.
*/
LocalVariable asIndirectUninitialized() { result = this.asIndirectUninitialized(_) }
/**
* Gets the positional parameter corresponding to the node that represents
* the value of the parameter after `index` number of loads, if any. For
@@ -761,16 +786,13 @@ module Public {
final override Type getType() { result = this.getPreUpdateNode().getType() }
}
/**
* The value of an uninitialized local variable, viewed as a node in a data
* flow graph.
*/
class UninitializedNode extends Node {
abstract private class AbstractUninitializedNode extends Node {
LocalVariable v;
int indirectionIndex;
UninitializedNode() {
AbstractUninitializedNode() {
exists(SsaImpl::Definition def, SsaImpl::SourceVariable sv |
def.getIndirectionIndex() = 0 and
def.getIndirectionIndex() = indirectionIndex and
def.getValue().asInstruction() instanceof UninitializedInstruction and
SsaImpl::defToNode(this, def, sv) and
v = sv.getBaseVariable().(SsaImpl::BaseIRVariable).getIRVariable().getAst()
@@ -781,6 +803,25 @@ module Public {
LocalVariable getLocalVariable() { result = v }
}
/**
* The value of an uninitialized local variable, viewed as a node in a data
* flow graph.
*/
class UninitializedNode extends AbstractUninitializedNode {
UninitializedNode() { indirectionIndex = 0 }
}
/**
* The value of an uninitialized local variable behind one or more levels of
* indirection, viewed as a node in a data flow graph.
*/
class IndirectUninitializedNode extends AbstractUninitializedNode {
IndirectUninitializedNode() { indirectionIndex > 0 }
/** Gets the indirection index of this node. */
int getIndirectionIndex() { result = indirectionIndex }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph. This includes both explicit parameters such as `x` in `f(x)`
@@ -795,6 +836,12 @@ module Public {
/** An explicit positional parameter, including `this`, but not `...`. */
final class DirectParameterNode = AbstractDirectParameterNode;
/**
* A node representing an indirection of a positional parameter,
* including `*this`, but not `*...`.
*/
final class IndirectParameterNode = AbstractIndirectParameterNode;
final class ExplicitParameterNode = AbstractExplicitParameterNode;
/** An implicit `this` parameter. */
@@ -954,11 +1001,6 @@ module Public {
private import Public
/**
* A node representing an indirection of a parameter.
*/
final class IndirectParameterNode = AbstractIndirectParameterNode;
/**
* A class that lifts pre-SSA dataflow nodes to regular dataflow nodes.
*/

View File

@@ -48,7 +48,6 @@ private import implementations.SqLite3
private import implementations.PostgreSql
private import implementations.System
private import implementations.StructuredExceptionHandling
private import implementations.ZMQ
private import implementations.Win32CommandExecution
private import implementations.CA2AEX
private import implementations.CComBSTR

View File

@@ -112,21 +112,3 @@ private class GetsFunction extends DataFlowFunction, ArrayFunction, AliasFunctio
override predicate hasArrayOutput(int bufParam) { bufParam = 0 }
}
/**
* A model for `getc` and similar functions that are flow sources.
*/
private class GetcSource extends SourceModelCsv {
override predicate row(string row) {
row =
[
";;false;getc;;;ReturnValue;remote", ";;false;getwc;;;ReturnValue;remote",
";;false;_getc_nolock;;;ReturnValue;remote", ";;false;_getwc_nolock;;;ReturnValue;remote",
";;false;getch;;;ReturnValue;local", ";;false;_getch;;;ReturnValue;local",
";;false;_getwch;;;ReturnValue;local", ";;false;_getch_nolock;;;ReturnValue;local",
";;false;_getwch_nolock;;;ReturnValue;local", ";;false;getchar;;;ReturnValue;local",
";;false;getwchar;;;ReturnValue;local", ";;false;_getchar_nolock;;;ReturnValue;local",
";;false;_getwchar_nolock;;;ReturnValue;local",
]
}
}

View File

@@ -1,45 +0,0 @@
/**
* Provides implementation classes modeling the ZeroMQ networking library.
*/
import semmle.code.cpp.models.interfaces.FlowSource
/**
* Remote flow sources.
*/
private class ZmqSource extends SourceModelCsv {
override predicate row(string row) {
row =
[
";;false;zmq_recv;;;Argument[*1];remote", ";;false;zmq_recvmsg;;;Argument[*1];remote",
";;false;zmq_msg_recv;;;Argument[*0];remote",
]
}
}
/**
* Remote flow sinks.
*/
private class ZmqSinks extends SinkModelCsv {
override predicate row(string row) {
row =
[
";;false;zmq_send;;;Argument[*1];remote-sink",
";;false;zmq_sendmsg;;;Argument[*1];remote-sink",
";;false;zmq_msg_send;;;Argument[*0];remote-sink",
]
}
}
/**
* Flow steps.
*/
private class ZmqSummaries extends SummaryModelCsv {
override predicate row(string row) {
row =
[
";;false;zmq_msg_init_data;;;Argument[*1];Argument[*0];taint",
";;false;zmq_msg_data;;;Argument[*0];ReturnValue[*];taint",
]
}
}