Merge branch 'main' into mad

This commit is contained in:
Geoffrey White
2024-04-02 16:14:53 +01:00
1094 changed files with 83377 additions and 45282 deletions

View File

@@ -1,3 +1,34 @@
## 0.9.9
### New Queries
* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.
### Query Metadata Changes
* `@precision medium` metadata was added to the `cpp/boost/tls-settings-misconfiguration` and `cpp/boost/use-of-deprecated-hardcoded-security-protocol` queries, and these queries are now included in the security-extended suite. The `@name` metadata of these queries were also updated.
### Minor Analysis Improvements
* The "Missing return-value check for a 'scanf'-like function" query (`cpp/missing-check-scanf`) has been converted to a `path-problem` query.
* The "Potentially uninitialized local variable" query (`cpp/uninitialized-local`) has been converted to a `path-problem` query.
* Added models for `GLib` allocation and deallocation functions.
## 0.9.8
No user-facing changes.
## 0.9.7
No user-facing changes.
## 0.9.6
### Minor Analysis Improvements
* The "non-constant format string" query (`cpp/non-constant-format`) has been converted to a `path-problem` query.
* The new C/C++ dataflow and taint-tracking libraries (`semmle.code.cpp.dataflow.new.DataFlow` and `semmle.code.cpp.dataflow.new.TaintTracking`) now implicitly assume that dataflow and taint modelled via `DataFlowFunction` and `TaintFunction` always fully overwrite their buffers and thus act as flow barriers. As a result, many dataflow and taint-tracking queries now produce fewer false positives. To remove this assumption and go back to the previous behavior for a given model, one can override the new `isPartialWrite` predicate.
## 0.9.5
### Minor Analysis Improvements

View File

@@ -37,6 +37,5 @@ where
DoubleFree::flowPath(source, sink) and
isFree(source.getNode(), _, _, dealloc) and
isFree(sink.getNode(), e2)
select sink.getNode(), source, sink,
"Memory pointed to by '" + e2.toString() + "' may already have been freed by $@.", dealloc,
dealloc.toString()
select sink.getNode(), source, sink, "Memory pointed to by $@ may already have been freed by $@.",
e2, e2.toString(), dealloc, dealloc.toString()

View File

@@ -2,7 +2,7 @@
* @name Missing return-value check for a 'scanf'-like function
* @description Failing to check that a call to 'scanf' actually writes to an
* output variable can lead to unexpected behavior at reading time.
* @kind problem
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision medium
@@ -18,16 +18,9 @@ import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
import ScanfChecks
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
pragma[nomagic]
predicate revFlow0(Node n) {
isSink(_, _, n, _)
or
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
}
import ScanfToUseFlow::PathGraph
/**
* Holds if `n` represents an uninitialized stack-allocated variable, or a
@@ -38,30 +31,45 @@ predicate isUninitialized(Node n) {
n.asIndirectExpr(1) instanceof AllocationExpr
}
pragma[nomagic]
predicate fwdFlow0(Node n) {
revFlow0(n) and
(
isUninitialized(n)
or
exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
)
}
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
input = call.getOutputArgument(index) and
n.asIndirectExpr() = input
}
/**
* A configuration to track a uninitialized data flowing to a `scanf`-like
* output parameter position.
*
* This is meant to be a simple flow to rule out cases like:
* ```
* int x = 0;
* scanf(..., &x);
* use(x);
* ```
* since `x` is already initialized it's not a security concern that `x` is
* used without checking the return value of `scanf`.
*
* Since this flow is meant to be simple, we disable field flow and require the
* source and the sink to be in the same callable.
*/
module UninitializedToScanfConfig implements ConfigSig {
predicate isSource(Node source) { isUninitialized(source) }
predicate isSink(Node sink) { isSink(_, _, sink, _) }
FlowFeature getAFeature() { result instanceof FeatureEqualSourceSinkCallContext }
int accessPathLimit() { result = 0 }
}
module UninitializedToScanfFlow = Global<UninitializedToScanfConfig>;
/**
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output)) and
exists(Node n | UninitializedToScanfFlow::flowTo(n) and isSink(call, index, n, output)) and
// Exclude results from incorrectky checked scanf query
not incorrectlyCheckedScanf(call)
}
@@ -77,31 +85,6 @@ predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
n.asDefiningArgument() = output
}
/**
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
@@ -112,40 +95,37 @@ predicate isSink(Node n, Expr e) {
}
/**
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
* A configuration to track flow from the output argument of a call to a
* `scanf`-like function, and to a use of the defined variable.
*/
pragma[nomagic]
predicate revFlow(Node n) {
fwdFlow(n) and
(
module ScanfToUseConfig implements ConfigSig {
predicate isSource(Node source) { isSource(_, _, source, _) }
predicate isSink(Node sink) { isSink(sink, _) }
predicate isBarrierOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
exists(Node succ |
revFlow(succ) and
localFlowStep(n, succ) and
not isSanitizerOut(n)
)
)
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
}
/** A local flow step, restricted to relevant dataflow nodes. */
private predicate step(Node n1, Node n2) {
revFlow(n1) and
revFlow(n2) and
localFlowStep(n1, n2)
}
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
module ScanfToUseFlow = Global<ScanfToUseConfig>;
/**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* a dataflow node that represents the expression `e`.
*/
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
isSource(call, index, source, _) and
hasFlow(source, sink) and
isSink(sink, e)
predicate flowPath(
ScanfToUseFlow::PathNode source, ScanfFunctionCall call, int index, ScanfToUseFlow::PathNode sink,
Expr e
) {
isSource(call, index, source.getNode(), _) and
ScanfToUseFlow::flowPath(source, sink) and
isSink(sink.getNode(), e)
}
/**
@@ -167,39 +147,33 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`.
*/
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
predicate hasNonGuardedAccess(
ScanfToUseFlow::PathNode source, ScanfFunctionCall call, ScanfToUseFlow::PathNode sink, Expr e,
int minGuard
) {
exists(int index |
hasFlow(_, call, index, _, e) and
flowPath(source, call, index, sink, e) and
minGuard = getMinimumGuardConstant(call, index)
|
not exists(int value |
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
not exists(GuardCondition guard |
// call == k and k >= minGuard so call >= minGuard
guard
.ensuresEq(globalValueNumber(call).getAnExpr(), any(int k | minGuard <= k),
e.getBasicBlock(), true)
or
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
// call >= k and k >= minGuard so call >= minGuard
guard
.ensuresLt(globalValueNumber(call).getAnExpr(), any(int k | minGuard <= k),
e.getBasicBlock(), false)
)
)
}
/** Returns a block guarded by the assertion of `value op call` */
BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
g.ensuresLt(left, right, 0, result, true) and op = "<"
or
g.ensuresLt(left, right, 1, result, true) and op = "<="
)
}
from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, e, minGuard)
select e,
from
ScanfToUseFlow::PathNode source, ScanfToUseFlow::PathNode sink, ScanfFunctionCall call, Expr e,
int minGuard
where hasNonGuardedAccess(source, call, sink, e, minGuard)
select e, source, sink,
"This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString()

View File

@@ -3,15 +3,11 @@ private import semmle.code.cpp.commons.Scanf
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.ValueNumbering
private ConstantInstruction getZeroInstruction() { result.getValue() = "0" }
private Operand zero() { result.getDef() = getZeroInstruction() }
private predicate exprInBooleanContext(Expr e) {
exists(IRGuardCondition gc |
exists(Instruction i |
i.getUnconvertedResultExpression() = e and
gc.comparesEq(valueNumber(i).getAUse(), zero(), 0, _, _)
gc.comparesEq(valueNumber(i).getAUse(), 0, _, _)
)
or
gc.getUnconvertedResultExpression() = e
@@ -36,10 +32,6 @@ private string getEofValue() {
)
}
private ConstantInstruction getEofInstruction() { result.getValue() = getEofValue() }
private Operand eof() { result.getDef() = getEofInstruction() }
/**
* Holds if the value of `call` has been checked to not equal `EOF`.
*/
@@ -47,10 +39,10 @@ private predicate checkedForEof(ScanfFunctionCall call) {
exists(IRGuardCondition gc |
exists(Instruction i | i.getUnconvertedResultExpression() = call |
// call == EOF
gc.comparesEq(valueNumber(i).getAUse(), eof(), 0, _, _)
gc.comparesEq(valueNumber(i).getAUse(), getEofValue().toInt(), _, _)
or
// call < 0 (EOF is guaranteed to be negative)
gc.comparesLt(valueNumber(i).getAUse(), zero(), 0, true, _)
gc.comparesLt(valueNumber(i).getAUse(), 0, true, _)
)
)
}

View File

@@ -37,6 +37,37 @@ class UncalledFunction extends Function {
}
}
/** The `unsigned short` type. */
class UnsignedShort extends ShortType {
UnsignedShort() { this.isUnsigned() }
}
/**
* Holds if `t` cannot refer to a string. That is, it's a built-in
* or arithmetic type that is not a "`char` like" type.
*/
predicate cannotContainString(Type t) {
exists(Type unspecified |
unspecified = t.getUnspecifiedType() and
not unspecified instanceof UnknownType and
not unspecified instanceof CharType and
not unspecified instanceof WideCharType and
not unspecified instanceof Char8Type and
not unspecified instanceof Char16Type and
not unspecified instanceof Char32Type and
// C often defines `wchar_t` as `unsigned short`
not unspecified instanceof UnsignedShort
|
unspecified instanceof ArithmeticType or
unspecified instanceof BuiltInType
)
}
predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) {
func.(DataFlowFunction).hasDataFlow(_, output) or
func.(TaintFunction).hasTaintFlow(_, output)
}
/**
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
* This is defined as either:
@@ -69,7 +100,9 @@ predicate isNonConst(DataFlow::Node node) {
// Parameters of uncalled functions that aren't const
exists(UncalledFunction f, Parameter p |
f.getAParameter() = p and
p = node.asParameter() and
// We pick the indirection of the parameter since this query is focused
// on strings.
p = node.asParameter(1) and
// Ignore main's argv parameter as it is already considered a `FlowSource`
// not ignoring it will result in path redundancies
(f.getName() = "main" implies p != f.getParameter(1))
@@ -82,30 +115,27 @@ predicate isNonConst(DataFlow::Node node) {
// are considered as possible non-const sources
// The function's output must also not be const to be considered a non-const source
exists(Function func, CallInstruction call |
// NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two
// variables representing the same call in ordoer to use `callOutput` below.
exists(Expr arg |
call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and
arg = node.asDefiningArgument()
)
or
call.getUnconvertedResultExpression() = node.asIndirectExpr()
not func.hasDefinition() and
func = call.getStaticCallTarget()
|
func = call.getStaticCallTarget() and
// Case 1: It's a known dataflow or taintflow function with flow to the return value
call.getUnconvertedResultExpression() = node.asIndirectExpr() and
not exists(FunctionOutput output |
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
// variant function's output are now possible non-const sources
pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or
pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output)
|
dataFlowOrTaintFlowFunction(func, output) and
output.isReturnValueDeref(_) and
node = callOutput(call, output)
)
) and
not exists(Call c |
c.getTarget().hasDefinition() and
if node instanceof DataFlow::DefinitionByReferenceNode
then c.getAnArgument() = node.asDefiningArgument()
else c = [node.asExpr(), node.asIndirectExpr()]
or
// Case 2: It's a known dataflow or taintflow function with flow to an output parameter
exists(int i |
call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() =
node.asDefiningArgument() and
not exists(FunctionOutput output |
dataFlowOrTaintFlowFunction(func, output) and
output.isParameterDeref(i, _) and
node = callOutput(call, output)
)
)
)
}
@@ -114,18 +144,29 @@ predicate isNonConst(DataFlow::Node node) {
* `FormattingFunctionCall`.
*/
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
sink.asIndirectExpr() = formatString and
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
}
module NonConstFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isNonConst(source) }
predicate isSource(DataFlow::Node source) {
exists(Type t |
isNonConst(source) and
t = source.getType() and
not cannotContainString(t)
)
}
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
predicate isBarrier(DataFlow::Node node) {
// Ignore tracing non-const through array indices
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr())
or
exists(Type t |
t = node.getType() and
cannotContainString(t)
)
}
}

View File

@@ -2,7 +2,7 @@
* @name Potentially uninitialized local variable
* @description Reading from a local variable that has not been assigned to
* will typically yield garbage.
* @kind problem
* @kind path-problem
* @id cpp/uninitialized-local
* @problem.severity warning
* @security-severity 7.8
@@ -15,6 +15,7 @@
import cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
/**
* Auxiliary predicate: Types that don't require initialization
@@ -89,4 +90,4 @@ where
conf.hasFlowPath(source, sink) and
isSinkImpl(sink.getInstruction(), va) and
v = va.getTarget()
select va, "The variable $@ may not be initialized at this access.", v, v.getName()
select va, source, sink, "The variable $@ may not be initialized at this access.", v, v.getName()

View File

@@ -1,8 +1,9 @@
/**
* @name Boost_asio TLS Settings Misconfiguration
* @name boost::asio TLS settings misconfiguration
* @description Using the TLS or SSLv23 protocol from the boost::asio library, but not disabling deprecated protocols, or disabling minimum-recommended protocols.
* @kind problem
* @problem.severity error
* @precision medium
* @security-severity 7.5
* @id cpp/boost/tls-settings-misconfiguration
* @tags security
@@ -12,34 +13,41 @@
import cpp
import semmle.code.cpp.security.boostorg.asio.protocols
module ExistsAnyFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(BoostorgAsio::SslContextClass c | c.getAContructorCall() = source.asExpr())
}
predicate isSourceImpl(DataFlow::Node source, ConstructorCall cc) {
exists(BoostorgAsio::SslContextClass c | c.getAContructorCall() = cc and cc = source.asExpr())
}
predicate isSink(DataFlow::Node sink) {
exists(BoostorgAsio::SslSetOptionsFunction f, FunctionCall fcSetOptions |
f.getACallToThisFunction() = fcSetOptions and
fcSetOptions.getQualifier() = sink.asExpr()
)
}
predicate isSinkImpl(DataFlow::Node sink, FunctionCall fcSetOptions) {
exists(BoostorgAsio::SslSetOptionsFunction f |
f.getACallToThisFunction() = fcSetOptions and
fcSetOptions.getQualifier() = sink.asIndirectExpr()
)
}
module ExistsAnyFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isSourceImpl(source, _) }
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
}
module ExistsAnyFlow = DataFlow::Global<ExistsAnyFlowConfig>;
bindingset[flag]
predicate isOptionSet(ConstructorCall cc, int flag, FunctionCall fcSetOptions) {
exists(VariableAccess contextSetOptions |
ExistsAnyFlow::flow(DataFlow::exprNode(cc), DataFlow::exprNode(contextSetOptions)) and
exists(BoostorgAsio::SslSetOptionsFunction f | f.getACallToThisFunction() = fcSetOptions |
contextSetOptions = fcSetOptions.getQualifier() and
forall(Expr optionArgument, Expr optionArgumentSource |
optionArgument = fcSetOptions.getArgument(0) and
BoostorgAsio::SslOptionFlow::flow(DataFlow::exprNode(optionArgumentSource),
DataFlow::exprNode(optionArgument))
|
optionArgument.getValue().toInt().bitShiftRight(16).bitAnd(flag) = flag
)
exists(
VariableAccess contextSetOptions, BoostorgAsio::SslSetOptionsFunction f, DataFlow::Node source,
DataFlow::Node sink
|
isSourceImpl(source, cc) and
isSinkImpl(sink, fcSetOptions) and
ExistsAnyFlow::flow(source, sink) and
f.getACallToThisFunction() = fcSetOptions and
contextSetOptions = fcSetOptions.getQualifier() and
forex(Expr optionArgument |
optionArgument = fcSetOptions.getArgument(0) and
BoostorgAsio::SslOptionFlow::flowTo(DataFlow::exprNode(optionArgument))
|
optionArgument.getValue().toInt().bitShiftRight(16).bitAnd(flag) = flag
)
)
}

View File

@@ -1,8 +1,9 @@
/**
* @name boost::asio Use of deprecated hardcoded Protocol
* @name boost::asio use of deprecated hardcoded protocol
* @description Using a deprecated hard-coded protocol using the boost::asio library.
* @kind problem
* @problem.severity error
* @precision medium
* @security-severity 7.5
* @id cpp/boost/use-of-deprecated-hardcoded-security-protocol
* @tags security

View File

@@ -4,6 +4,7 @@
* @kind metric
* @tags summary
* lines-of-code
* debug
* @id cpp/summary/lines-of-user-code
*/

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* The "non-constant format string" query (`cpp/non-constant-format`) has been converted to a `path-problem` query.

View File

@@ -1,4 +0,0 @@
---
category: newQuery
---
* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added models for `GLib` allocation and deallocation functions.

View File

@@ -1,4 +1,6 @@
---
category: minorAnalysis
---
## 0.9.6
### Minor Analysis Improvements
* The "non-constant format string" query (`cpp/non-constant-format`) has been converted to a `path-problem` query.
* The new C/C++ dataflow and taint-tracking libraries (`semmle.code.cpp.dataflow.new.DataFlow` and `semmle.code.cpp.dataflow.new.TaintTracking`) now implicitly assume that dataflow and taint modelled via `DataFlowFunction` and `TaintFunction` always fully overwrite their buffers and thus act as flow barriers. As a result, many dataflow and taint-tracking queries now produce fewer false positives. To remove this assumption and go back to the previous behavior for a given model, one can override the new `isPartialWrite` predicate.

View File

@@ -0,0 +1,3 @@
## 0.9.7
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 0.9.8
No user-facing changes.

View File

@@ -0,0 +1,15 @@
## 0.9.9
### New Queries
* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.
### Query Metadata Changes
* `@precision medium` metadata was added to the `cpp/boost/tls-settings-misconfiguration` and `cpp/boost/use-of-deprecated-hardcoded-security-protocol` queries, and these queries are now included in the security-extended suite. The `@name` metadata of these queries were also updated.
### Minor Analysis Improvements
* The "Missing return-value check for a 'scanf'-like function" query (`cpp/missing-check-scanf`) has been converted to a `path-problem` query.
* The "Potentially uninitialized local variable" query (`cpp/uninitialized-local`) has been converted to a `path-problem` query.
* Added models for `GLib` allocation and deallocation functions.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.9.5
lastReleaseVersion: 0.9.9

View File

@@ -0,0 +1,53 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using an iterator owned by a container after the lifetime of the container has expired can lead to undefined behavior.
This is because the iterator may be invalidated when the container is destroyed, and dereferencing an invalidated iterator is undefined behavior.
These problems can be hard to spot due to C++'s complex rules for temporary object lifetimes and their extensions.
</p>
</overview>
<recommendation>
<p>
Never create an iterator to a temporary container when the iterator is expected to be used after the container's lifetime has expired.
</p>
</recommendation>
<example>
<p>
</p>
<p>
The rules for lifetime extension ensures that the code in <code>lifetime_of_temp_extended</code> is well-defined. This is because the
lifetime of the temporary container returned by <code>get_vector</code> is extended to the end of the loop. However, prior to C++23,
the lifetime extension rules do not ensure that the container returned by <code>get_vector</code> is extended in <code>lifetime_of_temp_not_extended</code>.
This is because the temporary container is not bound to a rvalue reference.
</p>
<sample src="IteratorToExpiredContainerExtendedLifetime.cpp" />
</example>
<references>
<li>CERT C Coding Standard:
<a href="https://wiki.sei.cmu.edu/confluence/display/c/MEM30-C.+Do+not+access+freed+memory">MEM30-C. Do not access freed memory</a>.</li>
<li>
OWASP:
<a href="https://owasp.org/www-community/vulnerabilities/Using_freed_memory">Using freed memory</a>.
</li>
<li>
<a href="https://github.com/isocpp/CppCoreGuidelines/blob/master/docs/Lifetime.pdf">Lifetime safety: Preventing common dangling</a>
</li>
<li>
<a href="https://en.cppreference.com/w/cpp/container">Containers library</a>
</li>
<li>
<a href="https://en.cppreference.com/w/cpp/language/range-for">Range-based for loop (since C++11)</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,103 @@
/**
* @name Iterator to expired container
* @description Using an iterator owned by a container whose lifetime has expired may lead to unexpected behavior.
* @kind problem
* @precision high
* @id cpp/iterator-to-expired-container
* @problem.severity warning
* @tags reliability
* security
* external/cwe/cwe-416
* external/cwe/cwe-664
*/
// IMPORTANT: This query does not currently find anything since it relies on extractor and analysis improvements that hasn't yet been released
import cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.dataflow.new.DataFlow
import semmle.code.cpp.models.implementations.StdContainer
import semmle.code.cpp.models.implementations.StdMap
import semmle.code.cpp.models.implementations.Iterator
/**
* A configuration to track flow from a temporary variable to the qualifier of
* a destructor call
*/
module TempToDestructorConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source.asInstruction().(VariableAddressInstruction).getIRVariable() instanceof IRTempVariable
}
predicate isSink(DataFlow::Node sink) {
sink.asOperand().(ThisArgumentOperand).getCall().getStaticCallTarget() instanceof Destructor
}
}
module TempToDestructorFlow = DataFlow::Global<TempToDestructorConfig>;
/**
* Gets a `DataFlow::Node` that represents a temporary that will be destroyed
* by a call to a destructor, or a `DataFlow::Node` that will transitively be
* destroyed by a call to a destructor.
*
* For the latter case, consider something like:
* ```
* std::vector<std::vector<int>> get_2d_vector();
* auto& v = get_2d_vector()[0];
* ```
* Given the above, this predicate returns the node corresponding
* to `get_2d_vector()[0]` since the temporary `get_2d_vector()` gets
* destroyed by a call to `std::vector<std::vector<int>>::~vector`,
* and thus the result of `get_2d_vector()[0]` is also an invalid reference.
*/
DataFlow::Node getADestroyedNode() {
exists(TempToDestructorFlow::PathNode destroyedTemp | destroyedTemp.isSource() |
result = destroyedTemp.getNode()
or
exists(CallInstruction call |
result.asInstruction() = call and
DataFlow::localFlow(destroyedTemp.getNode(),
DataFlow::operandNode(call.getThisArgumentOperand()))
|
call.getStaticCallTarget() instanceof StdSequenceContainerAt or
call.getStaticCallTarget() instanceof StdMapAt
)
)
}
predicate isSinkImpl(DataFlow::Node sink, FunctionCall fc) {
exists(CallInstruction call |
call = sink.asOperand().(ThisArgumentOperand).getCall() and
fc = call.getUnconvertedResultExpression() and
call.getStaticCallTarget() instanceof BeginOrEndFunction
)
}
/**
* Flow from any destroyed object to the qualifier of a `begin` or `end` call
*/
module DestroyedToBeginConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source = getADestroyedNode() }
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
DataFlow::FlowFeature getAFeature() {
// By blocking argument-to-parameter flow we ensure that we don't enter a
// function body where the temporary outlives anything inside the function.
// This prevents false positives in cases like:
// ```cpp
// void foo(const std::vector<int>& v) {
// for(auto x : v) { ... } // this is fine since v outlives the loop
// }
// ...
// foo(create_temporary())
// ```
result instanceof DataFlow::FeatureHasSinkCallContext
}
}
module DestroyedToBeginFlow = DataFlow::Global<DestroyedToBeginConfig>;
from DataFlow::Node source, DataFlow::Node sink, FunctionCall beginOrEnd
where DestroyedToBeginFlow::flow(source, sink) and isSinkImpl(sink, beginOrEnd)
select source, "This object is destroyed before $@ is called.", beginOrEnd, beginOrEnd.toString()

View File

@@ -0,0 +1,20 @@
#include <vector>
std::vector<int> get_vector();
void use(int);
void lifetime_of_temp_extended() {
for(auto x : get_vector()) {
use(x); // GOOD: The lifetime of the vector returned by `get_vector()` is extended until the end of the loop.
}
}
// Writes the the values of `v` to an external log and returns it unchanged.
const std::vector<int>& log_and_return_argument(const std::vector<int>& v);
void lifetime_of_temp_not_extended() {
for(auto x : log_and_return_argument(get_vector())) {
use(x); // BAD: The lifetime of the vector returned by `get_vector()` is not extended, and the behavior is undefined.
}
}

View File

@@ -1,5 +1,5 @@
name: codeql/cpp-queries
version: 0.9.6-dev
version: 0.9.10-dev
groups:
- cpp
- queries