Merge branch 'main' into cpp/use-flow-state-inout-barriers

This commit is contained in:
Jeroen Ketema
2025-08-25 17:04:30 +02:00
committed by GitHub
29400 changed files with 2771434 additions and 889441 deletions

View File

@@ -21,7 +21,7 @@ string kindstr(Class c) {
or
kind = 2 and result = "Class"
or
kind = 6 and result = "Template class"
kind = [15, 16] and result = "Template class"
)
}
@@ -168,12 +168,7 @@ where
strictcount(string fieldName |
exists(Field f |
f.getDeclaringType() = c and
fieldName = f.getName() and
// IBOutlet's are a way of building GUIs
// automatically out of ObjC properties.
// We don't want to count those for the
// purposes of this query.
not f.getType().getAnAttribute().hasName("iboutlet")
fieldName = f.getName()
)
) and
n > 15 and

View File

@@ -0,0 +1,11 @@
void test()
{
char *foo = malloc(100);
// BAD
if (foo)
free(foo);
// GOOD
free(foo);
}

View File

@@ -0,0 +1,28 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>The <code>free</code> function, which deallocates heap memory, may accept a NULL pointer and take no action. Therefore, it is unnecessary to check the argument for the value of NULL before a function call to <code>free</code>. As such, these guards may hinder performance and readability.</p>
</overview>
<recommendation>
<p>A function call to <code>free</code> should not depend upon the value of its argument. Delete the condition preceding a function call to <code>free</code> when its only purpose is to check the value of the pointer to be freed.</p>
</recommendation>
<example>
<sample src = "GuardedFree.cpp" />
<p>In this example, the condition checking the value of <code>foo</code> can be deleted.</p>
</example>
<references>
<li>
The Open Group Base Specifications Issue 7, 2018 Edition:
<a href="https://pubs.opengroup.org/onlinepubs/9699919799/functions/free.html">free - free allocated memory</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,63 @@
/**
* @name Guarded Free
* @description NULL-condition guards before function calls to the memory-deallocation
* function free(3) are unnecessary, because passing NULL to free(3) is a no-op.
* @kind problem
* @problem.severity recommendation
* @precision very-high
* @id cpp/guarded-free
* @tags maintainability
* readability
*/
import cpp
import semmle.code.cpp.controlflow.Guards
class FreeCall extends FunctionCall {
FreeCall() { this.getTarget().hasGlobalName("free") }
}
predicate blockContainsPreprocessorBranches(BasicBlock bb) {
exists(PreprocessorBranch ppb, Location bbLoc, Location ppbLoc |
bbLoc = bb.(Stmt).getLocation() and ppbLoc = ppb.getLocation()
|
bbLoc.getFile() = ppb.getFile() and
bbLoc.getStartLine() < ppbLoc.getStartLine() and
ppbLoc.getEndLine() < bbLoc.getEndLine()
)
}
/**
* Holds if `gc` ensures that `v` is non-zero when reaching `bb`, and `bb`
* contains a single statement which is `fc`.
*/
pragma[nomagic]
private predicate interesting(GuardCondition gc, FreeCall fc, Variable v, BasicBlock bb) {
gc.ensuresEq(v.getAnAccess(), 0, bb, false) and
fc.getArgument(0) = v.getAnAccess() and
bb = fc.getBasicBlock() and
(
// No block statement: if (x) free(x);
bb = fc.getEnclosingStmt()
or
// Block statement with a single nested statement: if (x) { free(x); }
strictcount(bb.(BlockStmt).getAStmt()) = 1
) and
not fc.isInMacroExpansion() and
not blockContainsPreprocessorBranches(bb) and
not (gc instanceof BinaryOperation and not gc instanceof ComparisonOperation) and
not exists(CommaExpr c | c.getAChild*() = fc)
}
/** Holds if `gc` only guards a single block. */
bindingset[gc]
pragma[inline_late]
private predicate guardConditionGuardsUniqueBlock(GuardCondition gc) {
strictcount(BasicBlock bb | gc.ensuresEq(_, 0, bb, _)) = 1
}
from GuardCondition gc, FreeCall fc, Variable v, BasicBlock bb
where
interesting(gc, fc, v, bb) and
guardConditionGuardsUniqueBlock(gc)
select gc, "unnecessary NULL check before call to $@", fc, "free"

View File

@@ -9,11 +9,16 @@
*/
import cpp
import semmle.code.cpp.ConfigurationTestFile
from GlobalVariable gv
where
gv.getName().length() <= 3 and
not gv.isStatic()
// We will give an alert for the TemplateVariable, so we don't
// need to also give one for each instantiation
not gv instanceof VariableTemplateInstantiation and
not gv.isStatic() and
not gv.getFile() instanceof ConfigurationTestFile // variables in files generated during configuration are likely false positives
select gv,
"Poor global variable name '" + gv.getName() +
"'. Prefer longer, descriptive names for globals (eg. kMyGlobalConstant, not foo)."

View File

@@ -26,7 +26,7 @@ import cpp
*/
class TemplateDependentType extends Type {
TemplateDependentType() {
this instanceof TemplateParameter
this instanceof TypeTemplateParameter
or
exists(TemplateDependentType t |
this.refersToDirectly(t) and
@@ -57,5 +57,5 @@ where
not declarationHasSideEffects(v) and
not exists(AsmStmt s | f = s.getEnclosingFunction()) and
not v.getAnAttribute().getName() = "unused" and
not any(ErrorExpr e).getEnclosingFunction() = f // unextracted expr may use `v`
not f.hasErrors() // Unextracted expressions may use `v`
select v, "Variable " + v.getName() + " is not used."

View File

@@ -1,3 +1,356 @@
## 1.4.6
### Minor Analysis Improvements
* The `cpp/short-global-name` query will no longer give alerts for instantiations of template variables, only for the template itself.
* Fixed a false positive in `cpp/overflow-buffer` when the type of the destination buffer is a reference to a class/struct type.
## 1.4.5
### Minor Analysis Improvements
* The "Initialization code not run" query (`cpp/initialization-not-run`) no longer reports an alert on static global variables that have no dereference.
## 1.4.4
### Minor Analysis Improvements
* Due to changes in the `FunctionWithWrappers` library (`semmle.code.cpp.security.FunctionWithWrappers`) the primary alert location generated by the queries `cpp/path-injection`, `cpp/sql-injection`, `cpp/tainted-format-string`, and `cpp/command-line-injection` may have changed.
* Added flow models for the Win32 API functions `CreateThread`, `CreateRemoteThread`, and `CreateRemoteThreadEx`.
* Improved support for dataflow through function objects and lambda expressions.
* Added flow models for `pthread_create` and `std::thread`.
* The `cpp/incorrect-string-type-conversion` query no longer alerts on incorrect type conversions that occur in unreachable code.
* Added flow models for the GNU C Library.
* Fixed a number of false positives and false negatives in `cpp/global-use-before-init`. Note that this query is not part of any of the default query suites.
* The query `cpp/sql-injection` now can be extended using the `sql-injection` Models as Data (MaD) sink kind.
## 1.4.3
### Minor Analysis Improvements
* Added flow models for the following libraries: `madler/zlib`, `google/brotli`, `libidn/libidn2`, `libssh2/libssh2`, `nghttp2/nghttp2`, `libuv/libuv`, and `curl/curl`. This may result in more alerts when running queries on codebases that use these libraries.
## 1.4.2
No user-facing changes.
## 1.4.1
### Minor Analysis Improvements
* Added flow models for the `SQLite` and `OpenSSL` libraries. This may result in more alerts when running queries on codebases that use these libraries.
## 1.4.0
### Query Metadata Changes
* The tag `external/cwe/cwe-14` has been removed from `cpp/memset-may-be-deleted` and the tag `external/cwe/cwe-014` has been added.
* The tag `external/cwe/cwe-20` has been removed from `cpp/count-untrusted-data-external-api` and the tag `external/cwe/cwe-020` has been added.
* The tag `external/cwe/cwe-20` has been removed from `cpp/count-untrusted-data-external-api-ir` and the tag `external/cwe/cwe-020` has been added.
* The tag `external/cwe/cwe-20` has been removed from `cpp/untrusted-data-to-external-api-ir` and the tag `external/cwe/cwe-020` has been added.
* The tag `external/cwe/cwe-20` has been removed from `cpp/untrusted-data-to-external-api` and the tag `external/cwe/cwe-020` has been added.
* The tag `external/cwe/cwe-20` has been removed from `cpp/late-check-of-function-argument` and the tag `external/cwe/cwe-020` has been added.
## 1.3.9
No user-facing changes.
## 1.3.8
No user-facing changes.
## 1.3.7
### Minor Analysis Improvements
* Fixed a bug in the models for Microsoft's Active Template Library (ATL).
* The query "Use of basic integral type" (`cpp/jpl-c/basic-int-types`) no longer produces alerts for the standard fixed width integer types (`int8_t`, `uint8_t`, etc.), and the `_Bool` and `bool` types.
## 1.3.6
No user-facing changes.
## 1.3.5
### Minor Analysis Improvements
* Due to changes in libraries the query "Static array access may cause overflow" (`cpp/static-buffer-overflow`) will no longer report cases where multiple fields of a struct or class are written with a single `memset` or similar operation.
* The query "Call to memory access function may overflow buffer" (`cpp/overflow-buffer`) has been added to the security-extended query suite. The query detects a range of buffer overflow and underflow issues.
## 1.3.4
No user-facing changes.
## 1.3.3
### Minor Analysis Improvements
* The "Wrong type of arguments to formatting function" query (`cpp/wrong-type-format-argument`) now produces fewer FPs if the formatting function has multiple definitions.
* The "Call to memory access function may overflow buffer" query (`cpp/overflow-buffer`) now produces fewer FPs involving non-static member variables.
## 1.3.2
### Minor Analysis Improvements
* Added dataflow models for `SysAllocString` and related functions.
* The `cpp/badly-bounded-write`, `cpp/equality-on-floats`, `cpp/short-global-name`, `cpp/static-buffer-overflow`, `cpp/too-few-arguments`, `cpp/useless-expression`, `cpp/world-writable-file-creation` queries no longer produce alerts on files created by CMake to test the build configuration.
## 1.3.1
### Minor Analysis Improvements
* The "Returning stack-allocated memory" query (`cpp/return-stack-allocated-memory`) no longer produces results if there is an extraction error in the returned expression.
* The "Badly bounded write" query (`cpp/badly-bounded-write`) no longer produces results if there is an extraction error in the type of the output buffer.
* The "Too few arguments to formatting function" query (`cpp/wrong-number-format-arguments`) no longer produces results if an argument has an extraction error.
* The "Wrong type of arguments to formatting function" query (`cpp/wrong-type-format-argument`) no longer produces results when an argument type has an extraction error.
* Added dataflow models and flow sources for Microsoft's Active Template Library (ATL).
## 1.3.0
### New Queries
* Added a new high-precision quality query, `cpp/guarded-free`, which detects useless NULL pointer checks before calls to `free`. A variation of this query was originally contributed as an [experimental query by @mario-campos](https://github.com/github/codeql/pull/16331).
### Minor Analysis Improvements
* The "Call to function with fewer arguments than declared parameters" query (`cpp/too-few-arguments`) no longer produces results if the function has been implicitly declared.
## 1.2.7
No user-facing changes.
## 1.2.6
### Minor Analysis Improvements
* Remove results from the `cpp/wrong-type-format-argument` ("Wrong type of arguments to formatting function") query if the argument is the return value of an implicitly declared function.
## 1.2.5
### Minor Analysis Improvements
* The `cpp/unclear-array-index-validation` ("Unclear validation of array index") query has been improved to reduce false positives and increase true positives.
* Fixed false positives in the `cpp/uninitialized-local` ("Potentially uninitialized local variable") query if there are extraction errors in the function.
* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to detect byte arrays.
* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to recognize dynamic checks prior to possible dangerous widening.
## 1.2.4
### Minor Analysis Improvements
* Fixed false positives in the `cpp/wrong-number-format-arguments` ("Too few arguments to formatting function") query when the formatting function has been declared implicitly.
## 1.2.3
### Minor Analysis Improvements
* Removed false positives caused by buffer accesses in unreachable code
* Removed false positives caused by inconsistent type checking
* Add modeling of C functions that don't throw, thereby increasing the precision of the `cpp/incorrect-allocation-error-handling` ("Incorrect allocation-error handling") query. The query now produces additional true positives.
## 1.2.2
No user-facing changes.
## 1.2.1
### Minor Analysis Improvements
* The `cpp/uncontrolled-allocation-size` ("Uncontrolled allocation size") query now considers arithmetic operations that might reduce the size of user input as a barrier. The query therefore produces fewer false positive results.
## 1.2.0
### Query Metadata Changes
* The precision of `cpp/unsigned-difference-expression-compared-zero` ("Unsigned difference expression compared to zero") has been increased to `high`. As a result, it will be run by default as part of the Code Scanning suite.
### Minor Analysis Improvements
* Fixed false positives in the `cpp/memory-may-not-be-freed` ("Memory may not be freed") query involving class methods that returned an allocated field of that class being misidentified as allocators.
* The `cpp/incorrectly-checked-scanf` ("Incorrect return-value check for a 'scanf'-like function") query now produces fewer false positive results.
* The `cpp/incorrect-allocation-error-handling` ("Incorrect allocation-error handling") query no longer produces occasional false positive results inside template instantiations.
* The `cpp/suspicious-allocation-size` ("Not enough memory allocated for array of pointer type") query no longer produces false positives on "variable size" `struct`s.
## 1.1.0
### Query Metadata Changes
* The precision of `cpp/iterator-to-expired-container` ("Iterator to expired container") has been increased to `high`. As a result, it will be run by default as part of the Code Scanning suite.
* The precision of `cpp/unsafe-strncat` ("Potentially unsafe call to strncat") has been increased to `high`. As a result, it will be run by default as part of the Code Scanning suite.
### Minor Analysis Improvements
* The `cpp/unsigned-difference-expression-compared-zero` ("Unsigned difference expression compared to zero") query now produces fewer false positives.
## 1.0.3
No user-facing changes.
## 1.0.2
No user-facing changes.
## 1.0.1
### Minor Analysis Improvements
* The `cpp/dangerous-function-overflow` no longer produces a false positive alert when the `gets` function does not have exactly one parameter.
## 1.0.0
### Breaking Changes
* CodeQL package management is now generally available, and all GitHub-produced CodeQL packages have had their version numbers increased to 1.0.0.
### Minor Analysis Improvements
* The "Use of unique pointer after lifetime ends" query (`cpp/use-of-unique-pointer-after-lifetime-ends`) no longer reports an alert when the pointer is converted to a boolean
* The "Variable not initialized before use" query (`cpp/not-initialised`) no longer reports an alert on static variables.
## 0.9.12
### New Queries
* Added a new query, `cpp/iterator-to-expired-container`, to detect the creation of iterators owned by a temporary objects that are about to be destroyed.
## 0.9.11
### Minor Analysis Improvements
* The "Uncontrolled data used in path expression" query (`cpp/path-injection`) query produces fewer near-duplicate results.
* The "Global variable may be used before initialization" query (`cpp/global-use-before-init`) no longer raises an alert on global variables that are initialized when they are declared.
* The "Inconsistent null check of pointer" query (`cpp/inconsistent-nullness-testing`) query no longer raises an alert when the guarded check is in a macro expansion.
## 0.9.10
No user-facing changes.
## 0.9.9
### New Queries
* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.
### Query Metadata Changes
* `@precision medium` metadata was added to the `cpp/boost/tls-settings-misconfiguration` and `cpp/boost/use-of-deprecated-hardcoded-security-protocol` queries, and these queries are now included in the security-extended suite. The `@name` metadata of these queries were also updated.
### Minor Analysis Improvements
* The "Missing return-value check for a 'scanf'-like function" query (`cpp/missing-check-scanf`) has been converted to a `path-problem` query.
* The "Potentially uninitialized local variable" query (`cpp/uninitialized-local`) has been converted to a `path-problem` query.
* Added models for `GLib` allocation and deallocation functions.
## 0.9.8
No user-facing changes.
## 0.9.7
No user-facing changes.
## 0.9.6
### Minor Analysis Improvements
* The "non-constant format string" query (`cpp/non-constant-format`) has been converted to a `path-problem` query.
* The new C/C++ dataflow and taint-tracking libraries (`semmle.code.cpp.dataflow.new.DataFlow` and `semmle.code.cpp.dataflow.new.TaintTracking`) now implicitly assume that dataflow and taint modelled via `DataFlowFunction` and `TaintFunction` always fully overwrite their buffers and thus act as flow barriers. As a result, many dataflow and taint-tracking queries now produce fewer false positives. To remove this assumption and go back to the previous behavior for a given model, one can override the new `isPartialWrite` predicate.
## 0.9.5
### Minor Analysis Improvements
* The "non-constant format string" query (`cpp/non-constant-format`) has been updated to produce fewer false positives.
* Added dataflow models for the `gettext` function variants.
## 0.9.4
### Minor Analysis Improvements
* Corrected 2 false positive with `cpp/incorrect-string-type-conversion`: conversion of byte arrays to wchar and new array allocations converted to wchar.
* The "Incorrect return-value check for a 'scanf'-like function" query (`cpp/incorrectly-checked-scanf`) no longer reports an alert when an explicit check for EOF is added.
* The "Incorrect return-value check for a 'scanf'-like function" query (`cpp/incorrectly-checked-scanf`) now recognizes more EOF checks.
* The "Potentially uninitialized local variable" query (`cpp/uninitialized-local`) no longer reports an alert when the local variable is used as a qualifier to a static member function call.
* The diagnostic query `cpp/diagnostics/successfully-extracted-files` now considers any C/C++ file seen during extraction, even one with some errors, to be extracted / scanned. This affects the Code Scanning UI measure of scanned C/C++ files.
## 0.9.3
### Minor Analysis Improvements
* The `cpp/include-non-header` style query will now ignore the `.def` extension for textual header inclusions.
## 0.9.2
### New Queries
* Added a new query, `cpp/use-of-unique-pointer-after-lifetime-ends`, to detect uses of the contents unique pointers that will be destroyed immediately.
* The `cpp/incorrectly-checked-scanf` query has been added. This finds results where the return value of scanf is not checked correctly. Some of these were previously found by `cpp/missing-check-scanf` and will no longer be reported there.
### Minor Analysis Improvements
* The `cpp/badly-bounded-write` query could report false positives when a pointer was first initialized with a literal and later assigned a dynamically allocated array. These false positives now no longer occur.
## 0.9.1
No user-facing changes.
## 0.9.0
### Breaking Changes
* The `cpp/tainted-format-string-through-global` query has been deleted. This does not lead to a loss of relevant alerts, as the query duplicated a subset of the alerts from `cpp/tainted-format-string`.
### New Queries
* Added a new query, `cpp/use-of-string-after-lifetime-ends`, to detect calls to `c_str` on strings that will be destroyed immediately.
## 0.8.3
### Minor Analysis Improvements
* The `cpp/uninitialized-local` query has been improved to produce fewer false positives.
## 0.8.2
No user-facing changes.
## 0.8.1
### New Queries
* The query `cpp/redundant-null-check-simple` has been promoted to Code Scanning. The query finds cases where a pointer is compared to null after it has already been dereferenced. Such comparisons likely indicate a bug at the place where the pointer is dereferenced, or where the pointer is compared to null.
Note: This query was incorrectly noted as being promoted to Code Scanning in CodeQL version 2.14.6.
## 0.8.0
### Query Metadata Changes
* The `cpp/double-free` query has been further improved to reduce false positives and its precision has been increased from `medium` to `high`.
* The `cpp/use-after-free` query has been further improved to reduce false positives and its precision has been increased from `medium` to `high`.
### Minor Analysis Improvements
* The queries `cpp/double-free` and `cpp/use-after-free` find fewer false positives
in cases where a non-returning function is called.
* The number of duplicated dataflow paths reported by queries has been significantly reduced.
## 0.7.5
No user-facing changes.
## 0.7.4
### New Queries
* Added a new query, `cpp/invalid-pointer-deref`, to detect out-of-bounds pointer reads and writes.
### Minor Analysis Improvements
* The "Comparison where assignment was intended" query (`cpp/compare-where-assign-meant`) no longer reports comparisons that appear in macro expansions.
* Some queries that had repeated results corresponding to different levels of indirection for `argv` now only have a single result.
* The `cpp/non-constant-format` query no longer considers an assignment on the right-hand side of another assignment to be a source of non-constant format strings. As a result, the query may now produce fewer results.
## 0.7.3
No user-facing changes.

View File

@@ -14,13 +14,32 @@ the program, or security vulnerabilities, by allowing an attacker to overwrite a
</overview>
<recommendation>
<p>
Ensure that all execution paths deallocate the allocated memory at most once. If possible, reassign
the pointer to a null value after deallocating it. This will prevent double-free vulnerabilities since
most deallocation functions will perform a null-pointer check before attempting to deallocate the memory.
Ensure that all execution paths deallocate the allocated memory at most once. In complex cases it may
help to reassign a pointer to a null value after deallocating it. This will prevent double-free vulnerabilities
since most deallocation functions will perform a null-pointer check before attempting to deallocate memory.
</p>
</recommendation>
<example><sample src="DoubleFree.cpp" />
<example>
<p>
In the following example, <code>buff</code> is allocated and then freed twice:
</p>
<sample src="DoubleFreeBad.cpp" />
<p>
Reviewing the code above, the issue can be fixed by simply deleting the additional call to
<code>free(buff)</code>.
</p>
<sample src="DoubleFreeGood.cpp" />
<p>
In the next example, <code>task</code> may be deleted twice, if an exception occurs inside the <code>try</code>
block after the first <code>delete</code>:
</p>
<sample src="DoubleFreeBad2.cpp" />
<p>
The problem can be solved by assigning a null value to the pointer after the first <code>delete</code>, as
calling <code>delete</code> a second time on the null pointer is harmless.
</p>
<sample src="DoubleFreeGood2.cpp" />
</example>
<references>

View File

@@ -2,7 +2,7 @@
* @name Potential double free
* @description Freeing a resource more than once can lead to undefined behavior and cause memory corruption.
* @kind path-problem
* @precision medium
* @precision high
* @id cpp/double-free
* @problem.severity warning
* @security-severity 9.3
@@ -13,37 +13,29 @@
import cpp
import semmle.code.cpp.dataflow.new.DataFlow
import FlowAfterFree
import semmle.code.cpp.security.flowafterfree.FlowAfterFree
import DoubleFree::PathGraph
predicate isFree(DataFlow::Node n, Expr e) { isFree(n, e, _) }
/**
* `dealloc1` is a deallocation expression and `e` is an expression such
* that is deallocated by a deallocation expression, and the `(dealloc1, e)` pair
* should be excluded by the `FlowFromFree` library.
*
* Note that `e` is not necessarily the expression deallocated by `dealloc1`. It will
* be bound to the second deallocation as identified by the `FlowFromFree` library.
* Holds if `n` is a dataflow node that represents a pointer going into a
* deallocation function, and `e` is the corresponding expression.
*/
bindingset[dealloc1, e]
predicate isExcludeFreePair(DeallocationExpr dealloc1, Expr e) {
exists(DeallocationExpr dealloc2 | isFree(_, e, dealloc2) |
dealloc1.(FunctionCall).getTarget().hasGlobalName("MmFreePagesFromMdl") and
// From https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-mmfreepagesfrommdl:
// "After calling MmFreePagesFromMdl, the caller must also call ExFreePool
// to release the memory that was allocated for the MDL structure."
isExFreePoolCall(dealloc2, _)
)
predicate isFree(DataFlow::Node n, Expr e) { isFree(_, n, e, _) }
module DoubleFreeParam implements FlowFromFreeParamSig {
predicate isSink = isFree/2;
predicate isExcluded = isExcludedMmFreePageFromMdl/2;
predicate sourceSinkIsRelated = defaultSourceSinkIsRelated/2;
}
module DoubleFree = FlowFromFree<isFree/2, isExcludeFreePair/2>;
module DoubleFree = FlowFromFree<DoubleFreeParam>;
from DoubleFree::PathNode source, DoubleFree::PathNode sink, DeallocationExpr dealloc, Expr e2
where
DoubleFree::flowPath(source, sink) and
isFree(source.getNode(), _, dealloc) and
isFree(source.getNode(), _, _, dealloc) and
isFree(sink.getNode(), e2)
select sink.getNode(), source, sink,
"Memory pointed to by '" + e2.toString() + "' may already have been freed by $@.", dealloc,
dealloc.toString()
select sink.getNode(), source, sink, "Memory pointed to by $@ may already have been freed by $@.",
e2, e2.toString(), dealloc, dealloc.toString()

View File

@@ -0,0 +1,16 @@
void g() {
MyTask *task = nullptr;
try
{
task = new MyTask;
...
delete task;
...
} catch (...) {
delete task; // BAD: potential double-free
}
}

View File

@@ -0,0 +1,7 @@
int* f() {
int *buff = malloc(SIZE*sizeof(int));
do_stuff(buff);
free(buff); // GOOD: buff is only freed once.
int *new_buffer = malloc(SIZE*sizeof(int));
return new_buffer;
}

View File

@@ -0,0 +1,17 @@
void g() {
MyTask *task = nullptr;
try
{
task = new MyTask;
...
delete task;
task = nullptr;
...
} catch (...) {
delete task; // GOOD: harmless if task is NULL
}
}

View File

@@ -1,124 +0,0 @@
import cpp
import semmle.code.cpp.dataflow.new.DataFlow
private import semmle.code.cpp.ir.IR
/**
* Signature for a predicate that holds if `n.asExpr() = e` and `n` is a sink in
* the `FlowFromFreeConfig` module.
*/
private signature predicate isSinkSig(DataFlow::Node n, Expr e);
/**
* Holds if `dealloc` is a deallocation expression and `e` is an expression such
* that `isFree(_, e)` holds for some `isFree` predicate satisfying `isSinkSig`,
* and this source-sink pair should be excluded from the analysis.
*/
bindingset[dealloc, e]
private signature predicate isExcludedSig(DeallocationExpr dealloc, Expr e);
/**
* Holds if `(b1, i1)` strictly post-dominates `(b2, i2)`
*/
bindingset[i1, i2]
predicate strictlyPostDominates(IRBlock b1, int i1, IRBlock b2, int i2) {
b1 = b2 and
i1 > i2
or
b1.strictlyPostDominates(b2)
}
/**
* Holds if `(b1, i1)` strictly dominates `(b2, i2)`
*/
bindingset[i1, i2]
predicate strictlyDominates(IRBlock b1, int i1, IRBlock b2, int i2) {
b1 = b2 and
i1 < i2
or
b1.strictlyDominates(b2)
}
/**
* Constructs a `FlowFromFreeConfig` module that can be used to find flow between
* a pointer being freed by some deallocation function, and a user-specified sink.
*
* In order to reduce false positives, the set of sinks is restricted to only those
* that satisfy at least one of the following two criteria:
* 1. The source dominates the sink, or
* 2. The sink post-dominates the source.
*/
module FlowFromFree<isSinkSig/2 isASink, isExcludedSig/2 isExcluded> {
module FlowFromFreeConfig implements DataFlow::StateConfigSig {
class FlowState instanceof Expr {
FlowState() { isFree(_, this, _) }
string toString() { result = super.toString() }
}
predicate isSource(DataFlow::Node node, FlowState state) { isFree(node, state, _) }
pragma[inline]
predicate isSink(DataFlow::Node sink, FlowState state) {
exists(
Expr e, DataFlow::Node source, IRBlock b1, int i1, IRBlock b2, int i2,
DeallocationExpr dealloc
|
isASink(sink, e) and
isFree(source, state, dealloc) and
e != state and
source.hasIndexInBlock(b1, i1) and
sink.hasIndexInBlock(b2, i2) and
not isExcluded(dealloc, e)
|
strictlyDominates(b1, i1, b2, i2)
or
strictlyPostDominates(b2, i2, b1, i1)
)
}
predicate isBarrierIn(DataFlow::Node n) {
n.asIndirectExpr() = any(AddressOfExpr aoe)
or
n.asIndirectExpr() = any(Call call).getAnArgument()
or
exists(Expr e |
n.asIndirectExpr() = e.(PointerDereferenceExpr).getOperand() or
n.asIndirectExpr() = e.(ArrayExpr).getArrayBase()
|
e = any(StoreInstruction store).getDestinationAddress().getUnconvertedResultExpression()
)
}
}
import DataFlow::GlobalWithState<FlowFromFreeConfig>
}
/**
* Holds if `n` is a dataflow node such that `n.asExpr() = e` and `e`
* is being freed by a deallocation expression `dealloc`.
*/
predicate isFree(DataFlow::Node n, Expr e, DeallocationExpr dealloc) {
exists(Expr conv |
e = conv.getUnconverted() and
conv = dealloc.getFreedExpr().getFullyConverted() and
conv = n.asConvertedExpr()
) and
// Ignore realloc functions
not exists(dealloc.(FunctionCall).getTarget().(AllocationFunction).getReallocPtrArg())
}
/**
* Holds if `fc` is a function call that is the result of expanding
* the `ExFreePool` macro.
*/
predicate isExFreePoolCall(FunctionCall fc, Expr e) {
e = fc.getArgument(0) and
(
exists(MacroInvocation mi |
mi.getMacroName() = "ExFreePool" and
mi.getExpr() = fc
)
or
fc.getTarget().hasGlobalName("ExFreePool")
)
}

View File

@@ -21,27 +21,47 @@ predicate initFunc(GlobalVariable v, Function f) {
)
}
/** Holds if `v` has an initializer in function `f` that dominates `node`. */
predicate dominatingInitInFunc(GlobalVariable v, Function f, ControlFlowNode node) {
exists(VariableAccess initAccess |
v.getAnAccess() = initAccess and
initAccess.isUsedAsLValue() and
initAccess.getEnclosingFunction() = f and
dominates(initAccess, node)
)
}
predicate safeAccess(VariableAccess access) {
// it is safe if the variable access is part of a `sizeof` expression
exists(SizeofExprOperator e | e.getAChild*() = access)
}
predicate useFunc(GlobalVariable v, Function f) {
exists(VariableAccess access |
v.getAnAccess() = access and
access.isRValue() and
access.getEnclosingFunction() = f
) and
not initFunc(v, f)
access.getEnclosingFunction() = f and
not safeAccess(access) and
not dominatingInitInFunc(v, f, access)
)
}
predicate uninitialisedBefore(GlobalVariable v, Function f) {
f.hasGlobalName("main")
f.hasGlobalName("main") and
not initialisedAtDeclaration(v) and
not isStdlibVariable(v)
or
exists(Call call, Function g |
uninitialisedBefore(v, g) and
call.getEnclosingFunction() = g and
(not functionInitialises(f, v) or locallyUninitialisedAt(v, call)) and
(not functionInitialises(g, v) or locallyUninitialisedAt(v, call)) and
resolvedCall(call, f)
)
}
predicate functionInitialises(Function f, GlobalVariable v) {
initFunc(v, f)
or
exists(Call call |
call.getEnclosingFunction() = f and
initialisedBy(v, call)
@@ -58,7 +78,8 @@ predicate locallyUninitialisedAt(GlobalVariable v, Call call) {
exists(Call mid |
locallyUninitialisedAt(v, mid) and not initialisedBy(v, mid) and callPair(mid, call)
)
)
) and
not dominatingInitInFunc(v, call.getEnclosingFunction(), call)
}
predicate initialisedBy(GlobalVariable v, Call call) {
@@ -98,10 +119,15 @@ predicate callReaches(Call call, ControlFlowNode successor) {
)
}
/** Holds if `v` has an initializer. */
predicate initialisedAtDeclaration(GlobalVariable v) { exists(v.getInitializer()) }
/** Holds if `v` is a global variable that does not need to be initialized. */
predicate isStdlibVariable(GlobalVariable v) { v.hasGlobalName(["stdin", "stdout", "stderr"]) }
from GlobalVariable v, Function f
where
uninitialisedBefore(v, f) and
useFunc(v, f)
select f,
"The variable '" + v.getName() +
" is used in this function but may not be initialized when it is called."
select f, "The variable $@ is used in this function but may not be initialized when it is called.",
v, v.getName()

View File

@@ -15,6 +15,8 @@ import cpp
from StackVariable v, ControlFlowNode def, VariableAccess checked, VariableAccess unchecked
where
checked = v.getAnAccess() and
// The check can often be in a macro for handling exception
not checked.isInMacroExpansion() and
dereferenced(checked) and
unchecked = v.getAnAccess() and
dereferenced(unchecked) and

View File

@@ -0,0 +1,30 @@
{
int i, j;
// BAD: The result is only checked against zero
if (scanf("%d %d", &i, &j)) {
use(i);
use(j);
}
// BAD: The result is only checked against zero
if (scanf("%d %d", &i, &j) == 0) {
i = 0;
j = 0;
}
use(i);
use(j);
if (scanf("%d %d", &i, &j) == 2) {
// GOOD: the result is checked against 2
}
// GOOD: the result is compared directly
int r = scanf("%d %d", &i, &j);
if (r < 2) {
return;
}
if (r == 1) {
j = 0;
}
}

View File

@@ -0,0 +1,37 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This query finds calls of <tt>scanf</tt>-like functions with
improper return-value checking. Specifically, it flags uses of <code>scanf</code> where the return value is only checked against zero.
</p>
<p>
Functions in the <tt>scanf</tt> family return either <tt>EOF</tt> (a negative value)
in case of IO failure, or the number of items successfully read from the
input. Consequently, a simple check that the return value is nonzero
is not enough.
</p>
</overview>
<recommendation>
<p>
Ensure that all uses of <tt>scanf</tt> check the return value against the expected number of arguments
rather than just against zero.
</p>
</recommendation>
<example>
<p>The following examples show different ways of guarding a <tt>scanf</tt> output. In the BAD examples, the results are only checked against zero. In the GOOD examples, the results are checked against the expected number of matches instead.</p>
<sample src="IncorrectCheckScanf.cpp" />
</example>
<references>
<li>SEI CERT C++ Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/ERR62-CPP.+Detect+errors+when+converting+a+string+to+a+number">ERR62-CPP. Detect errors when converting a string to a number</a>.</li>
<li>SEI CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/ERR33-C.+Detect+and+handle+standard+library+errors">ERR33-C. Detect and handle standard library errors</a>.</li>
<li>cppreference.com: <a href="https://en.cppreference.com/w/c/io/fscanf">scanf, fscanf, sscanf, scanf_s, fscanf_s, sscanf_s</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Incorrect return-value check for a 'scanf'-like function
* @description Failing to account for EOF in a call to a scanf-like function can lead to
* undefined behavior.
* @kind problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id cpp/incorrectly-checked-scanf
* @tags security
* correctness
* external/cwe/cwe-253
*/
import cpp
import semmle.code.cpp.commons.Scanf
import ScanfChecks
from ScanfFunctionCall call
where incorrectlyCheckedScanf(call)
select call, "The result of scanf is only checked against 0, but it can also return EOF."

View File

@@ -32,9 +32,18 @@ predicate called(Function f) {
exists(FunctionAccess fa | fa.getTarget() = f)
}
predicate staticWithoutDereference(GlobalVariable v) {
v.isStatic() and
not exists(VariableAccess va |
va = v.getAnAccess() and
dereferenced(va)
)
}
from GlobalVariable v
where
global(v) and
not staticWithoutDereference(v) and
not exists(VariableAccess lval |
v.getAnAccess() = lval and
lval.isUsedAsLValue() and

View File

@@ -39,7 +39,7 @@ predicate allocCallOrIndirect(Expr e) {
allocCallOrIndirect(rtn.getExpr())
or
// return variable assigned with alloc
exists(Variable v |
exists(StackVariable v |
v = rtn.getExpr().(VariableAccess).getTarget() and
allocCallOrIndirect(v.getAnAssignedValue()) and
not assignedToFieldOrGlobal(v, _)

View File

@@ -2,7 +2,7 @@
* @name Missing return-value check for a 'scanf'-like function
* @description Failing to check that a call to 'scanf' actually writes to an
* output variable can lead to unexpected behavior at reading time.
* @kind problem
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision medium
@@ -18,15 +18,9 @@ import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
pragma[nomagic]
predicate revFlow0(Node n) {
isSink(_, _, n, _)
or
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
}
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
import ScanfChecks
import ScanfToUseFlow::PathGraph
/**
* Holds if `n` represents an uninitialized stack-allocated variable, or a
@@ -37,30 +31,47 @@ predicate isUninitialized(Node n) {
n.asIndirectExpr(1) instanceof AllocationExpr
}
pragma[nomagic]
predicate fwdFlow0(Node n) {
revFlow0(n) and
(
isUninitialized(n)
or
exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
)
}
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
input = call.getOutputArgument(index) and
n.asIndirectExpr() = input
}
/**
* A configuration to track a uninitialized data flowing to a `scanf`-like
* output parameter position.
*
* This is meant to be a simple flow to rule out cases like:
* ```
* int x = 0;
* scanf(..., &x);
* use(x);
* ```
* since `x` is already initialized it's not a security concern that `x` is
* used without checking the return value of `scanf`.
*
* Since this flow is meant to be simple, we disable field flow and require the
* source and the sink to be in the same callable.
*/
module UninitializedToScanfConfig implements ConfigSig {
predicate isSource(Node source) { isUninitialized(source) }
predicate isSink(Node sink) { isSink(_, _, sink, _) }
FlowFeature getAFeature() { result instanceof FeatureEqualSourceSinkCallContext }
int accessPathLimit() { result = 0 }
}
module UninitializedToScanfFlow = Global<UninitializedToScanfConfig>;
/**
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
exists(Node n | UninitializedToScanfFlow::flowTo(n) and isSink(call, index, n, output)) and
// Exclude results from incorrectky checked scanf query
not incorrectlyCheckedScanf(call)
}
/**
@@ -74,31 +85,6 @@ predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
n.asDefiningArgument() = output
}
/**
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
@@ -109,40 +95,37 @@ predicate isSink(Node n, Expr e) {
}
/**
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
* A configuration to track flow from the output argument of a call to a
* `scanf`-like function, and to a use of the defined variable.
*/
pragma[nomagic]
predicate revFlow(Node n) {
fwdFlow(n) and
(
module ScanfToUseConfig implements ConfigSig {
predicate isSource(Node source) { isSource(_, _, source, _) }
predicate isSink(Node sink) { isSink(sink, _) }
predicate isBarrierOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
exists(Node succ |
revFlow(succ) and
localFlowStep(n, succ) and
not isSanitizerOut(n)
)
)
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
}
/** A local flow step, restricted to relevant dataflow nodes. */
private predicate step(Node n1, Node n2) {
revFlow(n1) and
revFlow(n2) and
localFlowStep(n1, n2)
}
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
module ScanfToUseFlow = Global<ScanfToUseConfig>;
/**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* a dataflow node that represents the expression `e`.
*/
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
isSource(call, index, source, _) and
hasFlow(source, sink) and
isSink(sink, e)
predicate flowPath(
ScanfToUseFlow::PathNode source, ScanfFunctionCall call, int index, ScanfToUseFlow::PathNode sink,
Expr e
) {
isSource(call, index, source.getNode(), _) and
ScanfToUseFlow::flowPath(source, sink) and
isSink(sink.getNode(), e)
}
/**
@@ -164,39 +147,33 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`.
*/
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
predicate hasNonGuardedAccess(
ScanfToUseFlow::PathNode source, ScanfFunctionCall call, ScanfToUseFlow::PathNode sink, Expr e,
int minGuard
) {
exists(int index |
hasFlow(_, call, index, _, e) and
flowPath(source, call, index, sink, e) and
minGuard = getMinimumGuardConstant(call, index)
|
not exists(int value |
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
not exists(GuardCondition guard |
// call == k and k >= minGuard so call >= minGuard
guard
.ensuresEq(globalValueNumber(call).getAnExpr(), any(int k | minGuard <= k),
e.getBasicBlock(), true)
or
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
// call >= k and k >= minGuard so call >= minGuard
guard
.ensuresLt(globalValueNumber(call).getAnExpr(), any(int k | minGuard <= k),
e.getBasicBlock(), false)
)
)
}
/** Returns a block guarded by the assertion of `value op call` */
BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
g.ensuresLt(left, right, 0, result, true) and op = "<"
or
g.ensuresLt(left, right, 1, result, true) and op = "<="
)
}
from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, e, minGuard)
select e,
from
ScanfToUseFlow::PathNode source, ScanfToUseFlow::PathNode sink, ScanfFunctionCall call, Expr e,
int minGuard
where hasNonGuardedAccess(source, call, sink, e, minGuard)
select e, source, sink,
"This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString()

View File

@@ -54,6 +54,7 @@ predicate undefinedLocalUse(VariableAccess va) {
// it is hard to tell when a struct or array has been initialized, so we
// ignore them
not isAggregateType(lv.getUnderlyingType()) and
not lv.isStatic() and // static variables are initialized to zero or null by default
not lv.getType().hasName("va_list") and
va = lv.getAnAccess() and
noDefPath(lv, va) and
@@ -70,7 +71,8 @@ predicate uninitialisedGlobal(GlobalVariable gv) {
va = gv.getAnAccess() and
va.isRValue() and
not gv.hasInitializer() and
not gv.hasSpecifier("extern")
not gv.hasSpecifier("extern") and
not gv.isStatic() // static variables are initialized to zero or null by default
)
}

View File

@@ -82,6 +82,16 @@ module OverflowDestinationConfig implements DataFlow::ConfigSig {
nodeIsBarrierEqualityCandidate(node, access, checkedVar)
)
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSourceLocation(DataFlow::Node source) { none() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(FunctionCall fc | result = fc.getLocation() |
sourceSized(fc, sink.asIndirectConvertedExpr())
)
}
}
module OverflowDestination = TaintTracking::Global<OverflowDestinationConfig>;

View File

@@ -17,6 +17,7 @@ import cpp
import semmle.code.cpp.commons.Buffer
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
import semmle.code.cpp.ConfigurationTestFile
import LoopBounds
private predicate staticBufferBase(VariableAccess access, Variable v) {
@@ -42,7 +43,9 @@ class BufferAccess extends ArrayExpr {
not exists(Macro m |
m.getName() = "strcmp" and
m.getAnInvocation().getAnExpandedElement() = this
)
) and
//A buffer access must be reachable (not in dead code)
reachable(this)
}
int bufferSize() { staticBuffer(this.getArrayBase(), _, result) }
@@ -146,7 +149,10 @@ predicate outOfBounds(BufferAccess bufaccess, string msg) {
from Element error, string msg
where
overflowOffsetInLoop(error, msg) or
wrongBufferSize(error, msg) or
outOfBounds(error, msg)
(
overflowOffsetInLoop(error, msg) or
wrongBufferSize(error, msg) or
outOfBounds(error, msg)
) and
not error.getFile() instanceof ConfigurationTestFile // elements in files generated during configuration are likely false positives
select error, msg

View File

@@ -0,0 +1,64 @@
private import cpp
private import semmle.code.cpp.commons.Scanf
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.ValueNumbering
private predicate exprInBooleanContext(Expr e) {
exists(IRGuardCondition gc |
exists(Instruction i |
i.getUnconvertedResultExpression() = e and
gc.comparesEq(valueNumber(i).getAUse(), 0, _, _)
)
or
gc.getUnconvertedResultExpression() = e
)
}
private predicate isLinuxKernel() {
// For the purpose of sscanf, we check the header guards for the files that it is defined in (which have changed)
exists(Macro macro | macro.getName() in ["_LINUX_KERNEL_SPRINTF_H_", "_LINUX_KERNEL_H"])
}
/**
* Gets the value of the EOF macro.
*
* This is typically `"-1"`, but this is not guaranteed to be the case on all
* systems.
*/
private string getEofValue() {
exists(MacroInvocation mi |
mi.getMacroName() = "EOF" and
result = unique( | | mi.getExpr().getValue())
)
}
/**
* Holds if the value of `call` has been checked to not equal `EOF`.
*/
private predicate checkedForEof(ScanfFunctionCall call) {
exists(IRGuardCondition gc |
exists(CallInstruction i | i.getUnconvertedResultExpression() = call |
exists(int val | gc.comparesEq(valueNumber(i).getAUse(), val, _, _) |
// call == EOF
val = getEofValue().toInt()
or
// call == [any positive number]
val > 0
)
or
exists(int val | gc.comparesLt(valueNumber(i).getAUse(), val, true, _) |
// call < [any non-negative number] (EOF is guaranteed to be negative)
val >= 0
)
)
)
}
/**
* Holds if `call` is a `scanf`-like call were the result is only checked against 0, but it can also return EOF.
*/
predicate incorrectlyCheckedScanf(ScanfFunctionCall call) {
exprInBooleanContext(call) and
not checkedForEof(call) and
not isLinuxKernel() // scanf in the linux kernel can't return EOF
}

View File

@@ -15,6 +15,7 @@
import cpp
import semmle.code.cpp.models.Models
import semmle.code.cpp.commons.Buffer
predicate baseType(AllocationExpr alloc, Type base) {
exists(PointerType pointer |
@@ -30,7 +31,8 @@ predicate baseType(AllocationExpr alloc, Type base) {
}
predicate decideOnSize(Type t, int size) {
// If the codebase has more than one type with the same name, it can have more than one size.
// If the codebase has more than one type with the same name, it can have more than one size. For
// most purposes in this query, we use the smallest.
size = min(t.getSize())
}
@@ -45,7 +47,8 @@ where
size = 0 or
(allocated / size) * size = allocated
) and
not basesize > allocated // covered by SizeCheck.ql
not basesize > allocated and // covered by SizeCheck.ql
not memberMayBeVarSize(base.getUnspecifiedType(), _) // exclude variable size types
select alloc,
"Allocated memory (" + allocated.toString() + " bytes) is not a multiple of the size of '" +
base.getName() + "' (" + basesize.toString() + " bytes)."

View File

@@ -8,7 +8,7 @@
<p>
This rule finds accesses through a pointer of a memory location that has already been freed (i.e. through a dangling pointer).
Such memory blocks have already been released to the dynamic memory manager, and modifying them can lead to anything
from a segfault to memory corruption that would cause subsequent calls to the dynamic memory manger to behave
from a segfault to memory corruption that would cause subsequent calls to the dynamic memory manager to behave
erratically, to a possible security vulnerability.
</p>

View File

@@ -2,7 +2,7 @@
* @name Potential use after free
* @description An allocated memory block is used after it has been freed. Behavior in such cases is undefined and can cause memory corruption.
* @kind path-problem
* @precision medium
* @precision high
* @id cpp/use-after-free
* @problem.severity warning
* @security-severity 9.3
@@ -14,163 +14,25 @@
import cpp
import semmle.code.cpp.dataflow.new.DataFlow
import semmle.code.cpp.ir.IR
import FlowAfterFree
import UseAfterFree::PathGraph
import semmle.code.cpp.security.flowafterfree.FlowAfterFree
import semmle.code.cpp.security.flowafterfree.UseAfterFree
import UseAfterFreeTrace::PathGraph
/**
* Holds if `call` is a call to a function that obviously
* doesn't dereference its `i`'th argument.
*/
private predicate externalCallNeverDereferences(FormattingFunctionCall call, int arg) {
exists(int formatArg |
pragma[only_bind_out](call.getFormatArgument(formatArg)) =
pragma[only_bind_out](call.getArgument(arg)) and
call.getFormat().(FormatLiteral).getConvSpec(formatArg) != "%s"
)
module UseAfterFreeParam implements FlowFromFreeParamSig {
predicate isSink = isUse/2;
predicate isExcluded = isExcludedMmFreePageFromMdl/2;
predicate sourceSinkIsRelated = defaultSourceSinkIsRelated/2;
}
predicate isUse0(DataFlow::Node n, Expr e) {
e = n.asExpr() and
not isFree(_, e, _) and
(
e = any(PointerDereferenceExpr pde).getOperand()
or
e = any(PointerFieldAccess pfa).getQualifier()
or
e = any(ArrayExpr ae).getArrayBase()
or
e = any(Call call).getQualifier()
or
// Assume any function without a body will dereference the pointer
exists(int i, Call call, Function f |
n.asExpr() = call.getArgument(i) and
f = call.getTarget() and
not f.hasEntryPoint() and
// Exclude known functions we know won't dereference the pointer.
// For example, a call such as `printf("%p", myPointer)`.
not externalCallNeverDereferences(call, i)
)
)
}
import UseAfterFreeParam
module ParameterSinks {
import semmle.code.cpp.ir.ValueNumbering
module UseAfterFreeTrace = FlowFromFree<UseAfterFreeParam>;
predicate flowsToUse(DataFlow::Node n) {
isUse0(n, _)
or
exists(DataFlow::Node succ |
flowsToUse(succ) and
DataFlow::localFlowStep(n, succ)
)
}
private predicate flowsFromParam(DataFlow::Node n) {
flowsToUse(n) and
(
n.asParameter().getUnspecifiedType() instanceof PointerType
or
exists(DataFlow::Node prev |
flowsFromParam(prev) and
DataFlow::localFlowStep(prev, n)
)
)
}
private predicate step(DataFlow::Node n1, DataFlow::Node n2) {
flowsFromParam(n1) and
flowsFromParam(n2) and
DataFlow::localFlowStep(n1, n2)
}
private predicate paramToUse(DataFlow::Node n1, DataFlow::Node n2) = fastTC(step/2)(n1, n2)
private predicate hasFlow(
DataFlow::Node source, InitializeParameterInstruction init, DataFlow::Node sink
) {
pragma[only_bind_out](source.asParameter()) = pragma[only_bind_out](init.getParameter()) and
paramToUse(source, sink) and
isUse0(sink, _)
}
private InitializeParameterInstruction getAnAlwaysDereferencedParameter0() {
exists(DataFlow::Node source, DataFlow::Node sink, IRBlock b1, int i1, IRBlock b2, int i2 |
hasFlow(pragma[only_bind_into](source), result, pragma[only_bind_into](sink)) and
source.hasIndexInBlock(b1, pragma[only_bind_into](i1)) and
sink.hasIndexInBlock(b2, pragma[only_bind_into](i2)) and
strictlyPostDominates(b2, i2, b1, i1)
)
}
private CallInstruction getAnAlwaysReachedCallInstruction(IRFunction f) {
result.getBlock().postDominates(f.getEntryBlock())
}
pragma[nomagic]
predicate callHasTargetAndArgument(Function f, int i, CallInstruction call, Instruction argument) {
call.getStaticCallTarget() = f and
call.getArgument(i) = argument
}
pragma[nomagic]
predicate initializeParameterInFunction(Function f, int i, InitializeParameterInstruction init) {
pragma[only_bind_out](init.getEnclosingFunction()) = f and
init.hasIndex(i)
}
InitializeParameterInstruction getAnAlwaysDereferencedParameter() {
result = getAnAlwaysDereferencedParameter0()
or
exists(
CallInstruction call, int i, InitializeParameterInstruction p, Instruction argument,
Function f
|
callHasTargetAndArgument(f, i, call, argument) and
initializeParameterInFunction(f, i, p) and
p = getAnAlwaysDereferencedParameter() and
result =
pragma[only_bind_out](pragma[only_bind_into](valueNumber(argument)).getAnInstruction()) and
call = getAnAlwaysReachedCallInstruction(_)
)
}
}
module IsUse {
private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon
predicate isUse(DataFlow::Node n, Expr e) {
isUse0(n, e)
or
exists(CallInstruction call, InitializeParameterInstruction init |
n.asOperand().getDef().getUnconvertedResultExpression() = e and
pragma[only_bind_into](init) = ParameterSinks::getAnAlwaysDereferencedParameter() and
viableParamArg(call, DataFlow::instructionNode(init), n) and
pragma[only_bind_out](init.getEnclosingFunction()) =
pragma[only_bind_out](call.getStaticCallTarget())
)
}
}
import IsUse
/**
* `dealloc1` is a deallocation expression, `e` is an expression that dereferences a
* pointer, and the `(dealloc1, e)` pair should be excluded by the `FlowFromFree` library.
*/
bindingset[dealloc1, e]
predicate isExcludeFreeUsePair(DeallocationExpr dealloc1, Expr e) {
// From https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-mmfreepagesfrommdl:
// "After calling MmFreePagesFromMdl, the caller must also call ExFreePool
// to release the memory that was allocated for the MDL structure."
dealloc1.(FunctionCall).getTarget().hasGlobalName("MmFreePagesFromMdl") and
isExFreePoolCall(_, e)
}
module UseAfterFree = FlowFromFree<isUse/2, isExcludeFreeUsePair/2>;
from UseAfterFree::PathNode source, UseAfterFree::PathNode sink, DeallocationExpr dealloc
from UseAfterFreeTrace::PathNode source, UseAfterFreeTrace::PathNode sink, DeallocationExpr dealloc
where
UseAfterFree::flowPath(source, sink) and
isFree(source.getNode(), _, dealloc)
UseAfterFreeTrace::flowPath(source, sink) and
isFree(source.getNode(), _, _, dealloc)
select sink.getNode(), source, sink, "Memory may have been previously freed by $@.", dealloc,
dealloc.toString()

View File

@@ -1,16 +1,13 @@
/**
* @name Successfully extracted files
* @description Lists all files in the source code directory that were extracted without encountering a problem in the file.
* @name Extracted files
* @description Lists all files in the source code directory that were extracted.
* @kind diagnostic
* @id cpp/diagnostics/successfully-extracted-files
* @tags successfully-extracted-files
*/
import cpp
import ExtractionProblems
from File f
where
not exists(ExtractionProblem e | e.getFile() = f) and
exists(f.getRelativePath())
where exists(f.getRelativePath()) and f.fromSource()
select f, "File successfully extracted."

View File

@@ -14,5 +14,5 @@ where
or
warning instanceof ExtractionUnknownProblem
select warning,
"Extraction failed in " + warning.getFile() + " with warning " + warning.getProblemMessage(),
warning.getSeverity()
"Extraction failed in " + warning.getFile() + " with warning " +
warning.getProblemMessage().replaceAll("$", "$$"), warning.getSeverity()

View File

@@ -17,5 +17,6 @@ from ExtractionError error
where
error instanceof ExtractionUnknownError or
exists(error.getFile().getRelativePath())
select error, "Extraction failed in " + error.getFile() + " with error " + error.getErrorMessage(),
error.getSeverity()
select error,
"Extraction failed in " + error.getFile() + " with error " +
error.getErrorMessage().replaceAll("$", "$$"), error.getSeverity()

View File

@@ -12,7 +12,11 @@
import cpp
predicate allowedTypedefs(TypedefType t) {
t.getName() = ["I64", "U64", "I32", "U32", "I16", "U16", "I8", "U8", "F64", "F32"]
t.getName() =
[
"I64", "U64", "I32", "U32", "I16", "U16", "I8", "U8", "F64", "F32", "int64_t", "uint64_t",
"int32_t", "uint32_t", "int16_t", "uint16_t", "int8_t", "uint8_t"
]
}
/**
@@ -46,6 +50,8 @@ from Declaration d, Type usedType
where
usedType = getAUsedType*(getAnImmediateUsedType(d)) and
problematic(usedType) and
// Allow uses of boolean types where defined by the language.
not usedType instanceof BoolType and
// Ignore violations for which we do not have a valid location.
not d.getLocation() instanceof UnknownLocation
select d,

View File

@@ -12,6 +12,7 @@
*/
import cpp
import semmle.code.cpp.ConfigurationTestFile
from EqualityOperation ro, Expr left, Expr right
where
@@ -20,5 +21,6 @@ where
ro.getAnOperand().getExplicitlyConverted().getType().getUnderlyingType() instanceof
FloatingPointType and
not ro.getAnOperand().isConstant() and // comparisons to constants generate too many false positives
not left.(VariableAccess).getTarget() = right.(VariableAccess).getTarget() // skip self comparison
not left.(VariableAccess).getTarget() = right.(VariableAccess).getTarget() and // skip self comparison
not ro.getFile() instanceof ConfigurationTestFile // expressions in files generated during configuration are likely false positives
select ro, "Equality checks on floating point values can yield unexpected results."

View File

@@ -179,6 +179,7 @@ predicate overflows(MulExpr me, Type t) {
from MulExpr me, Type t1, Type t2
where
not any(Compilation c).buildModeNone() and
t1 = me.getType().getUnderlyingType() and
t2 = me.getConversion().getType().getUnderlyingType() and
t1.getSize() < t2.getSize() and

View File

@@ -49,7 +49,7 @@ predicate nanTest(EqualityOperation cmp) {
pointlessSelfComparison(cmp) and
exists(Type t | t = cmp.getLeftOperand().getUnspecifiedType() |
t instanceof FloatingPointType or
t instanceof TemplateParameter
t instanceof TypeTemplateParameter
)
}

View File

@@ -44,6 +44,12 @@ module CastToPointerArithFlowConfig implements DataFlow::StateConfigSig {
) and
getFullyConvertedType(node) = state
}
predicate isBarrierIn(DataFlow::Node node) { isSource(node, _) }
predicate isBarrierOut(DataFlow::Node node) { isSink(node, _) }
predicate observeDiffInformedIncrementalMode() { any() }
}
/**

View File

@@ -42,7 +42,7 @@ in the previous example, one solution is to make the log message a trailing argu
<p>An alternative solution is to allow <code>log_with_timestamp</code> to accept format arguments:</p>
<sample src="NonConstantFormat-2-good.c" />
<p>In this formulation, the non-constant format string to <code>printf</code> has been replaced with
a non-constant format string to <code>vprintf</code>. Semmle will no longer consider the body of
a non-constant format string to <code>vprintf</code>. The analysis will no longer consider the body of
<code>log_with_timestamp</code> to be a problem, and will instead check that every call to
<code>log_with_timestamp</code> passes a constant format string.</p>

View File

@@ -1,10 +1,14 @@
/**
* @name Non-constant format string
* @description Passing a non-constant 'format' string to a printf-like function can lead
* @description Passing a value that is not a string literal 'format' string to a printf-like function can lead
* to a mismatch between the number of arguments defined by the 'format' and the number
* of arguments actually passed to the function. If the format string ultimately stems
* from an untrusted source, this can be used for exploits.
* @kind problem
* This query finds format strings coming from non-literal sources. Note that format strings of
* type `const char*` it is still considered non-constant if the value is not coming from a string
* literal. For example, for a parameter with type `const char*` of an exported function that is
* used as a format string, there is no way to ensure the originating value was a string literal.
* @kind path-problem
* @problem.severity recommendation
* @security-severity 9.3
* @precision high
@@ -17,142 +21,177 @@
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.commons.Printf
import semmle.code.cpp.security.FlowSources
import semmle.code.cpp.ir.dataflow.internal.ModelUtil
import semmle.code.cpp.models.interfaces.DataFlow
import semmle.code.cpp.models.interfaces.Taint
import semmle.code.cpp.ir.IR
import NonConstFlow::PathGraph
// For the following `...gettext` functions, we assume that
// all translations preserve the type and order of `%` specifiers
// (and hence are safe to use as format strings). This
// assumption is hard-coded into the query.
predicate whitelistFunction(Function f, int arg) {
// basic variations of gettext
f.getName() = "_" and arg = 0
or
f.getName() = "gettext" and arg = 0
or
f.getName() = "dgettext" and arg = 1
or
f.getName() = "dcgettext" and arg = 1
or
// plural variations of gettext that take one format string for singular and another for plural form
f.getName() = "ngettext" and
(arg = 0 or arg = 1)
or
f.getName() = "dngettext" and
(arg = 1 or arg = 2)
or
f.getName() = "dcngettext" and
(arg = 1 or arg = 2)
class UncalledFunction extends Function {
UncalledFunction() {
not exists(Call c | c.getTarget() = this) and
// Ignore functions that appear to be function pointers
// function pointers may be seen as uncalled statically
not exists(FunctionAccess fa | fa.getTarget() = this)
}
}
// we assume that ALL uses of the `_` macro
// return constant string literals
predicate underscoreMacro(Expr e) {
exists(MacroInvocation mi |
mi.getMacroName() = "_" and
mi.getExpr() = e
/** The `unsigned short` type. */
class UnsignedShort extends ShortType {
UnsignedShort() { this.isUnsigned() }
}
/**
* Holds if `t` cannot refer to a string. That is, it's a built-in
* or arithmetic type that is not a "`char` like" type.
*/
predicate cannotContainString(Type t) {
exists(Type unspecified |
unspecified = t.getUnspecifiedType() and
not unspecified instanceof UnknownType and
not unspecified instanceof CharType and
not unspecified instanceof WideCharType and
not unspecified instanceof Char8Type and
not unspecified instanceof Char16Type and
not unspecified instanceof Char32Type and
// C often defines `wchar_t` as `unsigned short`
not unspecified instanceof UnsignedShort
|
unspecified instanceof ArithmeticType or
unspecified instanceof BuiltInType
)
}
predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) {
func.(DataFlowFunction).hasDataFlow(_, output) or
func.(TaintFunction).hasTaintFlow(_, output)
}
/**
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
* This is defined as either:
* 1) a `FlowSource`
* 2) a parameter of an 'uncalled' function
* 3) an argument to a function with no definition that is not known to define the output through its input
* 4) an out arg of a function with no definition that is not known to define the output through its input
*
* The latter two cases address identifying standard string manipulation libraries as input sources
* e.g., strcpy. More simply, functions without definitions that are known to manipulate the
* input to produce an output are not sources. Instead the ultimate source of input to these functions
* should be considered as the source.
*
* False Negative Implication: This approach has false negatives (fails to identify non-const sources)
* when the source is a field of a struct or object and the initialization is not observed statically.
* There are 3 general cases where this can occur:
* 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string.
* 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically.
* e.g., an object constructor isn't known statically, or a function sets fields
* of a struct, but the function is not known statically.
* 3) A function meeting cases (3) and (4) above returns (through an out argument or return value)
* a struct or object where a field containing a format string has been initialized.
*
* Note, uninitialized variables used as format strings are never detected by design.
* Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query.
*/
predicate isNonConst(DataFlow::Node node) {
node instanceof FlowSource
or
// Parameters of uncalled functions that aren't const
exists(UncalledFunction f, Parameter p |
f.getAParameter() = p and
// We pick the indirection of the parameter since this query is focused
// on strings.
p = node.asParameter(1) and
// Ignore main's argv parameter as it is already considered a `FlowSource`
// not ignoring it will result in path redundancies
(f.getName() = "main" implies p != f.getParameter(1))
)
or
// Consider as an input any out arg of a function or a function's return where the function is not:
// 1. a function with a known dataflow or taintflow from input to output and the `node` is the output
// 2. a function where there is a known definition
// i.e., functions that with unknown bodies and are not known to define the output through its input
// are considered as possible non-const sources
// The function's output must also not be const to be considered a non-const source
exists(Function func, CallInstruction call |
not func.hasDefinition() and
func = call.getStaticCallTarget()
|
// Case 1: It's a known dataflow or taintflow function with flow to the return value
call.getUnconvertedResultExpression() = node.asIndirectExpr() and
not exists(FunctionOutput output |
dataFlowOrTaintFlowFunction(func, output) and
output.isReturnValueDeref(_) and
node = callOutput(call, output)
)
or
// Case 2: It's a known dataflow or taintflow function with flow to an output parameter
exists(int i |
call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() =
node.asDefiningArgument() and
not exists(FunctionOutput output |
dataFlowOrTaintFlowFunction(func, output) and
output.isParameterDeref(i, _) and
node = callOutput(call, output)
)
)
)
}
/**
* Holds if `t` cannot hold a character array, directly or indirectly.
* Holds if `sink` is a sink is a format string of any
* `FormattingFunctionCall`.
*/
predicate cannotContainString(Type t, boolean isIndirect) {
isIndirect = false and
exists(Type unspecified |
unspecified = t.getUnspecifiedType() and
not unspecified instanceof UnknownType
|
unspecified instanceof BuiltInType or
unspecified instanceof IntegralOrEnumType
)
}
predicate isNonConst(DataFlow::Node node, boolean isIndirect) {
exists(Expr e |
e = node.asExpr() and isIndirect = false
or
e = node.asIndirectExpr() and isIndirect = true
|
exists(FunctionCall fc | fc = e |
not (
whitelistFunction(fc.getTarget(), _) or
fc.getTarget().hasDefinition()
)
)
or
exists(Parameter p | p = e.(VariableAccess).getTarget() |
p.getFunction().getName() = "main" and p.getType() instanceof PointerType
)
or
e instanceof CrementOperation
or
e instanceof AddressOfExpr
or
e instanceof ReferenceToExpr
or
e instanceof AssignPointerAddExpr
or
e instanceof AssignPointerSubExpr
or
e instanceof PointerArithmeticOperation
or
e instanceof FieldAccess
or
e instanceof PointerDereferenceExpr
or
e instanceof AddressOfExpr
or
e instanceof ExprCall
or
e instanceof NewArrayExpr
or
exists(Variable v | v = e.(VariableAccess).getTarget() |
v.getType().(ArrayType).getBaseType() instanceof CharType and
exists(AssignExpr ae |
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
)
)
)
or
node instanceof DataFlow::DefinitionByReferenceNode and
isIndirect = true
}
pragma[noinline]
predicate isBarrierNode(DataFlow::Node node) {
underscoreMacro([node.asExpr(), node.asIndirectExpr()])
or
exists(node.asExpr()) and
cannotContainString(node.getType(), false)
}
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
sink.asIndirectExpr() = formatString and
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
}
module NonConstFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(boolean isIndirect, Type t |
isNonConst(source, isIndirect) and
exists(Type t |
isNonConst(source) and
t = source.getType() and
not cannotContainString(t, isIndirect)
not cannotContainString(t)
)
}
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) }
predicate isBarrier(DataFlow::Node node) {
// Ignore tracing non-const through array indices
exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr())
or
exists(Type t |
t = node.getType() and
cannotContainString(t)
)
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSourceLocation(DataFlow::Node source) { none() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
result = sink.getLocation()
or
exists(FormattingFunctionCall call, Expr formatString | result = call.getLocation() |
isSinkImpl(sink, formatString) and
call.getArgument(call.getFormatParameterIndex()) = formatString
)
}
}
module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;
from FormattingFunctionCall call, Expr formatString
from
FormattingFunctionCall call, Expr formatString, NonConstFlow::PathNode sink,
NonConstFlow::PathNode source
where
isSinkImpl(sink.getNode(), formatString) and
call.getArgument(call.getFormatParameterIndex()) = formatString and
exists(DataFlow::Node sink |
NonConstFlow::flowTo(sink) and
isSinkImpl(sink, formatString)
)
select formatString,
"The format string argument to " + call.getTarget().getName() +
" should be constant to prevent security issues and other potential errors."
NonConstFlow::flowPath(source, sink)
select sink.getNode(), source, sink,
"The format string argument to $@ has a source which cannot be " +
"verified to originate from a string literal.", call, call.getTarget().getName()

View File

@@ -22,10 +22,8 @@ function.
</example>
<references>
<li>cplusplus.com: <a href="http://www.tutorialspoint.com/cplusplus/cpp_functions.htm">C++ Functions</a>.</li>
<li>CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/FIO47-C.+Use+valid+format+strings">FIO47-C. Use valid format strings</a>.</li>
<li>Microsoft C Runtime Library Reference: <a href="https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/printf-printf-l-wprintf-wprintf-l">printf, wprintf</a>.</li>
</references>
</qhelp>

View File

@@ -19,8 +19,8 @@ contents.
</overview>
<recommendation>
<p>Review the format and arguments expected by the highlighted function calls. Update either
the format or the arguments so that the expected number of arguments are passed to the
<p>Review the format and arguments expected by the highlighted function calls. Update either
the format or the arguments so that the expected number of arguments are passed to the
function.
</p>
@@ -30,11 +30,8 @@ function.
</example>
<references>
<li>CERT C Coding
Standard: <a href="https://www.securecoding.cert.org/confluence/display/c/FIO30-C.+Exclude+user+input+from+format+strings">FIO30-C. Exclude user input from format strings</a>.</li>
<li>cplusplus.com: <a href="http://www.tutorialspoint.com/cplusplus/cpp_functions.htm">C++ Functions</a>.</li>
<li>CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/FIO47-C.+Use+valid+format+strings">FIO47-C. Use valid format strings</a>.</li>
<li>Microsoft C Runtime Library Reference: <a href="https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/printf-printf-l-wprintf-wprintf-l">printf, wprintf</a>.</li>
</references>
</qhelp>

View File

@@ -16,6 +16,20 @@
import cpp
class SyntaxError extends CompilerError {
SyntaxError() { this.getTag().matches("exp_%") }
predicate affects(Element e) {
exists(Location l1, Location l2 |
l1 = this.getLocation() and
l2 = e.getLocation()
|
l1.getFile() = l2.getFile() and
l1.getStartLine() = l2.getStartLine()
)
}
}
from FormatLiteral fl, FormattingFunctionCall ffc, int expected, int given, string ffcName
where
ffc = fl.getUse() and
@@ -27,7 +41,11 @@ where
if ffc.isInMacroExpansion()
then ffcName = ffc.getTarget().getName() + " (in a macro expansion)"
else ffcName = ffc.getTarget().getName()
)
) and
// A typical problem is that string literals are concatenated, but if one of the string
// literals is an undefined macro, then this just leads to a syntax error.
not exists(SyntaxError e | e.affects(fl)) and
not ffc.getArgument(_) instanceof ErrorExpr
select ffc,
"Format for " + ffcName + " expects " + expected.toString() + " arguments but given " +
given.toString()

View File

@@ -1,4 +0,0 @@
int main() {
printf("%s\n", 42); //printf will treat 42 as a char*, will most likely segfault
return 0;
}

View File

@@ -4,29 +4,33 @@
<qhelp>
<overview>
<p>Each call to the <code>printf</code> function or a related function should include
the type and sequence of arguments defined by the format. If the function is passed arguments
the type and sequence of arguments defined by the format. If the function is passed arguments
of a different type or in a different sequence then the arguments are reinterpreted to fit the type and sequence expected, resulting in unpredictable behavior.</p>
</overview>
<recommendation>
<p>Review the format and arguments expected by the highlighted function calls. Update either
the format or the arguments so that the expected type and sequence of arguments are passed to
<p>Review the format and arguments expected by the highlighted function calls. Update either
the format or the arguments so that the expected type and sequence of arguments are passed to
the function.
</p>
</recommendation>
<example><sample src="WrongTypeFormatArguments.cpp" />
<example>
<p>In the following example, the wrong format specifier is given for an integer format argument:</p>
<sample src="WrongTypeFormatArgumentsBad.cpp" />
<p>The corrected version uses <code>%i</code> as the format specifier for the integer format argument:</p>
<sample src="WrongTypeFormatArgumentsGood.cpp" />
</example>
<references>
<li>CERT C Coding
Standard: <a href="https://www.securecoding.cert.org/confluence/display/c/FIO30-C.+Exclude+user+input+from+format+strings">FIO30-C. Exclude user input from format strings</a>.</li>
<li>cplusplus.com: <a href="http://www.tutorialspoint.com/cplusplus/cpp_functions.htm">C++ Functions</a>.</li>
<li>CRT Alphabetical Function Reference: <a href="https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/printf-printf-l-wprintf-wprintf-l">printf, _printf_l, wprintf, _wprintf_l</a>.</li>
<li>Microsoft Learn: <a href="https://learn.microsoft.com/en-us/cpp/c-runtime-library/format-specification-syntax-printf-and-wprintf-functions?view=msvc-170">Format specification syntax: printf and wprintf functions</a>.</li>
<li>cplusplus.com:<a href="https://cplusplus.com/reference/cstdio/printf/"></a>printf</li>
<li>CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/FIO47-C.+Use+valid+format+strings">FIO47-C. Use valid format strings</a>.</li>
</references>
</qhelp>

View File

@@ -154,6 +154,7 @@ int sizeof_IntType() { exists(IntType it | result = it.getSize()) }
from FormattingFunctionCall ffc, int n, Expr arg, Type expected, Type actual
where
not any(Compilation c).buildModeNone() and
(
formattingFunctionCallExpectedType(ffc, n, expected) and
formattingFunctionCallActualType(ffc, n, arg, actual) and
@@ -170,7 +171,10 @@ where
) and
not arg.isAffectedByMacro() and
not arg.isFromUninstantiatedTemplate(_) and
not actual.getUnspecifiedType() instanceof ErroneousType
not actual.stripType() instanceof ErroneousType and
not arg.(Call).mayBeFromImplicitlyDeclaredFunction() and
// Make sure that the format function definition is consistent
count(ffc.getTarget().getFormatParameterIndex()) = 1
select arg,
"This argument should be of type '" + expected.getName() + "' but is of type '" +
"This format specifier for type '" + expected.getName() + "' does not match the argument type '" +
actual.getUnspecifiedType().getName() + "'."

View File

@@ -0,0 +1,4 @@
int main() {
printf("%s\n", 42); // BAD: printf will treat 42 as a char*, will most likely segfault
return 0;
}

View File

@@ -0,0 +1,4 @@
int main() {
printf("%i\n", 42); // GOOD: printf will treat 42 as an int
return 0;
}

View File

@@ -205,20 +205,6 @@ class ChecksForLeapYearFunctionCall extends FunctionCall {
ChecksForLeapYearFunctionCall() { this.getTarget() instanceof ChecksForLeapYearFunction }
}
/**
* Data flow configuration for finding a variable access that would flow into
* a function call that includes an operation to check for leap year.
*/
deprecated class LeapYearCheckConfiguration extends DataFlow::Configuration {
LeapYearCheckConfiguration() { this = "LeapYearCheckConfiguration" }
override predicate isSource(DataFlow::Node source) { source.asExpr() instanceof VariableAccess }
override predicate isSink(DataFlow::Node sink) {
exists(ChecksForLeapYearFunctionCall fc | sink.asExpr() = fc.getAnArgument())
}
}
/**
* Data flow configuration for finding a variable access that would flow into
* a function call that includes an operation to check for leap year.
@@ -229,37 +215,14 @@ private module LeapYearCheckConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) {
exists(ChecksForLeapYearFunctionCall fc | sink.asExpr() = fc.getAnArgument())
}
predicate observeDiffInformedIncrementalMode() {
none() // only used negatively in UncheckedLeapYearAfterYearModification.ql
}
}
module LeapYearCheckFlow = DataFlow::Global<LeapYearCheckConfig>;
/**
* Data flow configuration for finding an operation with hardcoded 365 that will flow into
* a `FILEINFO` field.
*/
deprecated class FiletimeYearArithmeticOperationCheckConfiguration extends DataFlow::Configuration {
FiletimeYearArithmeticOperationCheckConfiguration() {
this = "FiletimeYearArithmeticOperationCheckConfiguration"
}
override predicate isSource(DataFlow::Node source) {
exists(Expr e, Operation op | e = source.asExpr() |
op.getAChild*().getValue().toInt() = 365 and
op.getAChild*() = e
)
}
override predicate isSink(DataFlow::Node sink) {
exists(StructLikeClass dds, FieldAccess fa, AssignExpr aexpr, Expr e | e = sink.asExpr() |
dds instanceof PackedTimeType and
fa.getQualifier().getUnderlyingType() = dds and
fa.isModified() and
aexpr.getAChild() = fa and
aexpr.getChild(1).getAChild*() = e
)
}
}
/**
* Data flow configuration for finding an operation with hardcoded 365 that will flow into
* a `FILEINFO` field.
@@ -286,51 +249,6 @@ private module FiletimeYearArithmeticOperationCheckConfig implements DataFlow::C
module FiletimeYearArithmeticOperationCheckFlow =
DataFlow::Global<FiletimeYearArithmeticOperationCheckConfig>;
/**
* Taint configuration for finding an operation with hardcoded 365 that will flow into any known date/time field.
*/
deprecated class PossibleYearArithmeticOperationCheckConfiguration extends TaintTracking::Configuration
{
PossibleYearArithmeticOperationCheckConfiguration() {
this = "PossibleYearArithmeticOperationCheckConfiguration"
}
override predicate isSource(DataFlow::Node source) {
exists(Operation op | op = source.asExpr() |
op.getAChild*().getValue().toInt() = 365 and
(
not op.getParent() instanceof Expr or
op.getParent() instanceof Assignment
)
)
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
// flow from anything on the RHS of an assignment to a time/date structure to that
// assignment.
exists(StructLikeClass dds, FieldAccess fa, Assignment aexpr, Expr e |
e = node1.asExpr() and
fa = node2.asExpr()
|
(dds instanceof PackedTimeType or dds instanceof UnpackedTimeType) and
fa.getQualifier().getUnderlyingType() = dds and
aexpr.getLValue() = fa and
aexpr.getRValue().getAChild*() = e
)
}
override predicate isSink(DataFlow::Node sink) {
exists(StructLikeClass dds, FieldAccess fa, AssignExpr aexpr |
aexpr.getRValue() = sink.asExpr()
|
(dds instanceof PackedTimeType or dds instanceof UnpackedTimeType) and
fa.getQualifier().getUnderlyingType() = dds and
fa.isModified() and
aexpr.getLValue() = fa
)
}
}
/**
* Taint configuration for finding an operation with hardcoded 365 that will flow into any known date/time field.
*/
@@ -345,6 +263,8 @@ private module PossibleYearArithmeticOperationCheckConfig implements DataFlow::C
)
}
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// flow from anything on the RHS of an assignment to a time/date structure to that
// assignment.
@@ -369,6 +289,14 @@ private module PossibleYearArithmeticOperationCheckConfig implements DataFlow::C
aexpr.getLValue() = fa
)
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSourceLocation(DataFlow::Node source) {
result = source.asExpr().getLocation()
}
Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.asExpr().getLocation() }
}
module PossibleYearArithmeticOperationCheckFlow =

View File

@@ -12,7 +12,8 @@
*/
import cpp
private import semmle.code.cpp.commons.Exclusions
import semmle.code.cpp.commons.Exclusions
import semmle.code.cpp.ConfigurationTestFile
class PureExprInVoidContext extends ExprInVoidContext {
PureExprInVoidContext() { this.isPure() }
@@ -90,6 +91,7 @@ where
not peivc.getType() instanceof UnknownType and
not functionContainsDisabledCodeRecursive(peivc.(FunctionCall).getTarget()) and
not functionDefinedInIfDefRecursive(peivc.(FunctionCall).getTarget()) and
not peivc.getFile() instanceof ConfigurationTestFile and // expressions in files generated during configuration are likely false positives
if peivc instanceof FunctionCall
then
exists(Function target |

View File

@@ -2,19 +2,18 @@
void f_warning(int i)
{
// The usage of the logical not operator in this case is unlikely to be correct
// BAD: the usage of the logical not operator in this case is unlikely to be correct
// as the output is being used as an operator for a bit-wise and operation
if (i & !FLAGS)
if (i & !FLAGS)
{
// code
}
}
void f_fixed(int i)
{
if (i & ~FLAGS) // Changing the logical not operator for the bit-wise not operator would fix this logic
if (i & ~FLAGS) // GOOD: Changing the logical not operator for the bit-wise not operator would fix this logic
{
// code
}
}
}

View File

@@ -16,7 +16,13 @@
<p>Carefully inspect the flagged expressions. Consider the intent in the code logic, and decide whether it is necessary to change the not operator.</p>
</recommendation>
<example><sample src="IncorrectNotOperatorUsage.cpp" /></example>
<example>
<p>Here is an example of this issue and how it can be fixed:</p>
<sample src="IncorrectNotOperatorUsage.cpp" />
<p>In other cases, particularly when the expressions have <code>bool</code> type, the fix may instead be of the form <code>a &amp;&amp; !b</code>.</p>
</example>
<references>
<li>

View File

@@ -37,6 +37,19 @@ class AllocaCall extends FunctionCall {
}
}
/**
* Gets an expression associated with a dataflow node.
*/
private Expr getExpr(DataFlow::Node node) {
result = node.asInstruction().getAst()
or
result = node.asOperand().getUse().getAst()
or
result = node.(DataFlow::RawIndirectInstruction).getInstruction().getAst()
or
result = node.(DataFlow::RawIndirectOperand).getOperand().getUse().getAst()
}
/**
* A loop that contains an `alloca` call.
*/
@@ -185,19 +198,6 @@ class LoopWithAlloca extends Stmt {
not this.conditionReachesWithoutUpdate(var, this.(Loop).getCondition())
}
/**
* Gets an expression associated with a dataflow node.
*/
private Expr getExpr(DataFlow::Node node) {
result = node.asInstruction().getAst()
or
result = node.asOperand().getUse().getAst()
or
result = node.(DataFlow::RawIndirectInstruction).getInstruction().getAst()
or
result = node.(DataFlow::RawIndirectOperand).getOperand().getUse().getAst()
}
/**
* Gets a definition that may be the most recent definition of the
* controlling variable `var` before this loop.
@@ -208,9 +208,9 @@ class LoopWithAlloca extends Stmt {
this.conditionRequiresInequality(va, _, _) and
DataFlow::localFlow(result, DataFlow::exprNode(va)) and
// Phi nodes will be preceded by nodes that represent actual definitions
not result instanceof DataFlow::SsaPhiNode and
not result instanceof DataFlow::SsaSynthNode and
// A source is outside the loop if it's not inside the loop
not exists(Expr e | e = this.getExpr(result) | this = getAnEnclosingLoopOfExpr(e))
not exists(Expr e | e = getExpr(result) | this = getAnEnclosingLoopOfExpr(e))
)
}
@@ -221,9 +221,9 @@ class LoopWithAlloca extends Stmt {
private int getAControllingVarInitialValue(Variable var, DataFlow::Node source) {
source = this.getAPrecedingDef(var) and
(
result = this.getExpr(source).getValue().toInt()
result = getExpr(source).getValue().toInt()
or
result = this.getExpr(source).(Assignment).getRValue().getValue().toInt()
result = getExpr(source).(Assignment).getRValue().getValue().toInt()
)
}

View File

@@ -107,7 +107,7 @@ class SnprintfSizeExpr extends BufferAccess, FunctionCall {
}
class MemcmpSizeExpr extends BufferAccess, FunctionCall {
MemcmpSizeExpr() { this.getTarget().hasName("Memcmp") }
MemcmpSizeExpr() { this.getTarget().hasName("memcmp") }
override Expr getPointer() {
result = this.getArgument(0) or
@@ -129,24 +129,6 @@ class NetworkFunctionCall extends FunctionCall {
NetworkFunctionCall() { this.getTarget().hasName(["ntohd", "ntohf", "ntohl", "ntohll", "ntohs"]) }
}
deprecated class NetworkToBufferSizeConfiguration extends DataFlow::Configuration {
NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" }
override predicate isSource(DataFlow::Node node) { node.asExpr() instanceof NetworkFunctionCall }
override predicate isSink(DataFlow::Node node) {
node.asExpr() = any(BufferAccess ba).getAccessedLength()
}
override predicate isBarrier(DataFlow::Node node) {
exists(GuardCondition gc, GVN gvn |
gc.getAChild*() = gvn.getAnExpr() and
globalValueNumber(node.asExpr()) = gvn and
gc.controls(node.asExpr().getBasicBlock(), _)
)
}
}
private module NetworkToBufferSizeConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node.asExpr() instanceof NetworkFunctionCall }
@@ -159,6 +141,8 @@ private module NetworkToBufferSizeConfig implements DataFlow::ConfigSig {
gc.controls(node.asExpr().getBasicBlock(), _)
)
}
predicate observeDiffInformedIncrementalMode() { any() }
}
module NetworkToBufferSizeFlow = DataFlow::Global<NetworkToBufferSizeConfig>;

View File

@@ -1,7 +0,0 @@
Record* fixRecord(Record* r) {
Record myRecord = *r;
delete r;
myRecord.fix();
return &myRecord; //returns reference to myRecord, which is a stack-allocated object
}

View File

@@ -5,22 +5,23 @@
<overview>
<p>This rule finds return statements that return pointers to an object allocated on the stack.
The lifetime of a stack allocated memory location only lasts until the function returns, and
the contents of that memory become undefined after that. Clearly, using a pointer to stack
<p>This rule finds return statements that return pointers to an object allocated on the stack.
The lifetime of a stack allocated memory location only lasts until the function returns, and
the contents of that memory become undefined after that. Clearly, using a pointer to stack
memory after the function has already returned will have undefined results. </p>
</overview>
<recommendation>
<p>Use the functions of the <tt>malloc</tt> family to dynamically allocate memory on the heap for data that is used across function calls.</p>
<p>Use the functions of the <tt>malloc</tt> family, or <tt>new</tt>, to dynamically allocate memory on the heap for data that is used across function calls.</p>
</recommendation>
<example><sample src="ReturnStackAllocatedMemory.cpp" />
<example>
<p>The following example allocates an object on the stack and returns a pointer to it. This is incorrect because the object is deallocated
when the function returns, and the pointer becomes invalid.</p>
<sample src="ReturnStackAllocatedMemoryBad.cpp" />
<p>To fix this, allocate the object on the heap using <tt>new</tt> and return a pointer to the heap-allocated object.</p>
<sample src="ReturnStackAllocatedMemoryGood.cpp" />
</example>
<references>

View File

@@ -27,16 +27,26 @@ class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
ReturnStackAllocatedMemoryConfig() { this = "ReturnStackAllocatedMemoryConfig" }
override predicate isSource(Instruction source) {
// Holds if `source` is a node that represents the use of a stack variable
exists(VariableAddressInstruction var, Function func |
var = source and
func = source.getEnclosingFunction() and
var.getAstVariable() instanceof StackVariable and
// Pointer-to-member types aren't properly handled in the dbscheme.
not var.getResultType() instanceof PointerToMemberType and
exists(Function func |
// Rule out FPs caused by extraction errors.
not any(ErrorExpr e).getEnclosingFunction() = func and
not intentionallyReturnsStackPointer(func)
not func.hasErrors() and
not intentionallyReturnsStackPointer(func) and
func = source.getEnclosingFunction()
|
// `source` is an instruction that represents the use of a stack variable
exists(VariableAddressInstruction var |
var = source and
var.getAstVariable() instanceof StackVariable and
// Pointer-to-member types aren't properly handled in the dbscheme.
not var.getResultType() instanceof PointerToMemberType
)
or
// `source` is an instruction that represents the return value of a
// function that is known to return stack-allocated memory.
exists(Call call |
call.getTarget().hasGlobalName(["alloca", "strdupa", "strndupa", "_alloca", "_malloca"]) and
source.getUnconvertedResultExpression() = call
)
)
}
@@ -82,13 +92,15 @@ class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
or
node2.(PointerOffsetInstruction).getLeftOperand() = node1
}
override predicate isBarrier(Instruction n) { n.getResultType() instanceof ErroneousType }
}
from
MustFlowPathNode source, MustFlowPathNode sink, VariableAddressInstruction var,
MustFlowPathNode source, MustFlowPathNode sink, Instruction instr,
ReturnStackAllocatedMemoryConfig conf
where
conf.hasFlowPath(pragma[only_bind_into](source), pragma[only_bind_into](sink)) and
source.getInstruction() = var
source.getInstruction() = instr
select sink.getInstruction(), source, sink, "May return stack-allocated memory from $@.",
var.getAst(), var.getAst().toString()
instr.getAst(), instr.getAst().toString()

View File

@@ -0,0 +1,5 @@
Record *mkRecord(int value) {
Record myRecord(value);
return &myRecord; // BAD: returns a pointer to `myRecord`, which is a stack-allocated object.
}

View File

@@ -0,0 +1,5 @@
Record *mkRecord(int value) {
Record *myRecord = new Record(value);
return myRecord; // GOOD: returns a pointer to a `myRecord`, which is a heap-allocated object.
}

View File

@@ -1,2 +0,0 @@
strncpy(dest, src, sizeof(src)); //wrong: size of dest should be used
strncpy(dest, src, strlen(src)); //wrong: size of dest should be used

View File

@@ -3,7 +3,7 @@
"qhelp.dtd">
<qhelp>
<overview>
<p>The standard library function <code>strncpy</code> copies a source string to a destination buffer. The third argument defines the maximum number of characters to copy and should be less than
<p>The standard library function <code>strncpy</code> copies a source string to a destination buffer. The third argument defines the maximum number of characters to copy and should be less than
or equal to the size of the destination buffer. Calls of the form <code>strncpy(dest, src, strlen(src))</code> or <code>strncpy(dest, src, sizeof(src))</code> incorrectly set the third argument to the size of the source buffer. Executing a call of this type may cause a buffer overflow. Buffer overflows can lead to anything from a segmentation fault to a security vulnerability.</p>
</overview>
@@ -12,14 +12,20 @@ or equal to the size of the destination buffer. Calls of the form <code>strncpy(
not the source buffer.</p>
</recommendation>
<example><sample src="StrncpyFlippedArgs.cpp" />
<example>
<p>In the following examples, the size of the source buffer is incorrectly used as a parameter to <code>strncpy</code>:</p>
<sample src="StrncpyFlippedArgsBad.cpp" />
<p>The corrected version uses the size of the destination buffer, or a variable containing the size of the destination buffer as the size parameter to <code>strncpy</code>:</p>
<sample src="StrncpyFlippedArgsGood.cpp" />
</example>
<references>
<li>cplusplus.com: <a href="http://www.cplusplus.com/reference/clibrary/cstring/strncpy/">strncpy</a>.</li>
<li>cplusplus.com: <a href="https://cplusplus.com/reference/cstring/strncpy/">strncpy</a>.</li>
<li>
I. Gerg. <em>An Overview and Example of the Buffer-Overflow Exploit</em>. IANewsletter vol 7 no 4. 2005.
</li>

View File

@@ -0,0 +1,9 @@
char src[256];
char dest1[128];
...
strncpy(dest1, src, sizeof(src)); // wrong: size of dest should be used
char *dest2 = (char *)malloc(sz1 + sz2 + sz3);
strncpy(dest2, src, strlen(src)); // wrong: size of dest should be used

View File

@@ -0,0 +1,10 @@
char src[256];
char dest1[128];
...
strncpy(dest1, src, sizeof(dest1)); // correct
size_t destSize = sz1 + sz2 + sz3;
char *dest2 = (char *)malloc(destSize);
strncpy(dest2, src, destSize); // correct

View File

@@ -47,11 +47,17 @@ Type stripType(Type t) {
or
result = stripType(t.(Decltype).getBaseType())
or
result = stripType(t.(TypeofType).getBaseType())
or
result = stripType(t.(IntrinsicTransformedType).getBaseType())
or
not t instanceof TypedefType and
not t instanceof ArrayType and
not t instanceof ReferenceType and
not t instanceof SpecifiedType and
not t instanceof Decltype and
not t instanceof TypeofType and
not t instanceof IntrinsicTransformedType and
result = t
}

View File

@@ -4,7 +4,7 @@
* @kind problem
* @problem.severity warning
* @security-severity 9.3
* @precision medium
* @precision high
* @id cpp/unsafe-strncat
* @tags reliability
* correctness

View File

@@ -2,7 +2,7 @@
* @name Potentially uninitialized local variable
* @description Reading from a local variable that has not been assigned to
* will typically yield garbage.
* @kind problem
* @kind path-problem
* @id cpp/uninitialized-local
* @problem.severity warning
* @security-severity 7.8
@@ -13,7 +13,9 @@
*/
import cpp
import semmle.code.cpp.controlflow.StackVariableReachability
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
/**
* Auxiliary predicate: Types that don't require initialization
@@ -33,31 +35,6 @@ predicate allocatedType(Type t) {
allocatedType(t.getUnspecifiedType())
}
/**
* A declaration of a local variable that leaves the
* variable uninitialized.
*/
DeclStmt declWithNoInit(LocalVariable v) {
result.getADeclaration() = v and
not exists(v.getInitializer()) and
/* The type of the variable is not stack-allocated. */
exists(Type t | t = v.getType() | not allocatedType(t))
}
class UninitialisedLocalReachability extends StackVariableReachability {
UninitialisedLocalReachability() { this = "UninitialisedLocal" }
override predicate isSource(ControlFlowNode node, StackVariable v) { node = declWithNoInit(v) }
override predicate isSink(ControlFlowNode node, StackVariable v) { useOfVarActual(v, node) }
override predicate isBarrier(ControlFlowNode node, StackVariable v) {
// only report the _first_ possibly uninitialized use
useOfVarActual(v, node) or
definitionBarrier(v, node)
}
}
pragma[noinline]
predicate containsInlineAssembly(Function f) { exists(AsmStmt s | s.getEnclosingFunction() = f) }
@@ -80,10 +57,38 @@ VariableAccess commonException() {
// Finally, exclude functions that contain assembly blocks. It's
// anyone's guess what happens in those.
containsInlineAssembly(result.getEnclosingFunction())
or
exists(Call c | c.getQualifier() = result | c.getTarget().isStatic())
}
from UninitialisedLocalReachability r, LocalVariable v, VariableAccess va
predicate isSinkImpl(Instruction sink, VariableAccess va) {
exists(LoadInstruction load |
va = load.getUnconvertedResultExpression() and
not va = commonException() and
not va.getTarget().(LocalVariable).getFunction().hasErrors() and
sink = load.getSourceValue()
)
}
class MustFlow extends MustFlowConfiguration {
MustFlow() { this = "MustFlow" }
override predicate isSource(Instruction source) {
source instanceof UninitializedInstruction and
exists(Type t | t = source.getResultType() | not allocatedType(t))
}
override predicate isSink(Operand sink) { isSinkImpl(sink.getDef(), _) }
override predicate allowInterproceduralFlow() { none() }
override predicate isBarrier(Instruction instr) { instr instanceof ChiInstruction }
}
from
VariableAccess va, LocalVariable v, MustFlow conf, MustFlowPathNode source, MustFlowPathNode sink
where
r.reaches(_, v, va) and
not va = commonException()
select va, "The variable $@ may not be initialized at this access.", v, v.getName()
conf.hasFlowPath(source, sink) and
isSinkImpl(sink.getInstruction(), va) and
v = va.getTarget()
select va, source, sink, "The variable $@ may not be initialized at this access.", v, v.getName()

View File

@@ -24,7 +24,7 @@ predicate instructionHasVariable(VariableAddressInstruction vai, StackVariable v
// Pointer-to-member types aren't properly handled in the dbscheme.
not vai.getResultType() instanceof PointerToMemberType and
// Rule out FPs caused by extraction errors.
not any(ErrorExpr e).getEnclosingFunction() = f
not f.hasErrors()
}
/**

View File

@@ -1,8 +1,9 @@
/**
* @name Boost_asio TLS Settings Misconfiguration
* @name boost::asio TLS settings misconfiguration
* @description Using the TLS or SSLv23 protocol from the boost::asio library, but not disabling deprecated protocols, or disabling minimum-recommended protocols.
* @kind problem
* @problem.severity error
* @precision medium
* @security-severity 7.5
* @id cpp/boost/tls-settings-misconfiguration
* @tags security
@@ -12,34 +13,41 @@
import cpp
import semmle.code.cpp.security.boostorg.asio.protocols
module ExistsAnyFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(BoostorgAsio::SslContextClass c | c.getAContructorCall() = source.asExpr())
}
predicate isSourceImpl(DataFlow::Node source, ConstructorCall cc) {
exists(BoostorgAsio::SslContextClass c | c.getAContructorCall() = cc and cc = source.asExpr())
}
predicate isSink(DataFlow::Node sink) {
exists(BoostorgAsio::SslSetOptionsFunction f, FunctionCall fcSetOptions |
f.getACallToThisFunction() = fcSetOptions and
fcSetOptions.getQualifier() = sink.asExpr()
)
}
predicate isSinkImpl(DataFlow::Node sink, FunctionCall fcSetOptions) {
exists(BoostorgAsio::SslSetOptionsFunction f |
f.getACallToThisFunction() = fcSetOptions and
fcSetOptions.getQualifier() = sink.asIndirectExpr()
)
}
module ExistsAnyFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isSourceImpl(source, _) }
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
}
module ExistsAnyFlow = DataFlow::Global<ExistsAnyFlowConfig>;
bindingset[flag]
predicate isOptionSet(ConstructorCall cc, int flag, FunctionCall fcSetOptions) {
exists(VariableAccess contextSetOptions |
ExistsAnyFlow::flow(DataFlow::exprNode(cc), DataFlow::exprNode(contextSetOptions)) and
exists(BoostorgAsio::SslSetOptionsFunction f | f.getACallToThisFunction() = fcSetOptions |
contextSetOptions = fcSetOptions.getQualifier() and
forall(Expr optionArgument, Expr optionArgumentSource |
optionArgument = fcSetOptions.getArgument(0) and
BoostorgAsio::SslOptionFlow::flow(DataFlow::exprNode(optionArgumentSource),
DataFlow::exprNode(optionArgument))
|
optionArgument.getValue().toInt().bitShiftRight(16).bitAnd(flag) = flag
)
exists(
VariableAccess contextSetOptions, BoostorgAsio::SslSetOptionsFunction f, DataFlow::Node source,
DataFlow::Node sink
|
isSourceImpl(source, cc) and
isSinkImpl(sink, fcSetOptions) and
ExistsAnyFlow::flow(source, sink) and
f.getACallToThisFunction() = fcSetOptions and
contextSetOptions = fcSetOptions.getQualifier() and
forex(Expr optionArgument |
optionArgument = fcSetOptions.getArgument(0) and
BoostorgAsio::SslOptionFlow::flowTo(DataFlow::exprNode(optionArgument))
|
optionArgument.getValue().toInt().bitShiftRight(16).bitAnd(flag) = flag
)
)
}

View File

@@ -1,8 +1,9 @@
/**
* @name boost::asio Use of deprecated hardcoded Protocol
* @name boost::asio use of deprecated hardcoded protocol
* @description Using a deprecated hard-coded protocol using the boost::asio library.
* @kind problem
* @problem.severity error
* @precision medium
* @security-severity 7.5
* @id cpp/boost/use-of-deprecated-hardcoded-security-protocol
* @tags security

View File

@@ -5,10 +5,12 @@
* it should be moved before the dereference.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id cpp/redundant-null-check-simple
* @tags reliability
* correctness
* security
* external/cwe/cwe-476
*/

View File

@@ -2,7 +2,7 @@ import cpp
private predicate mightHaveConstMethods(Type t) {
t instanceof Class or
t instanceof TemplateParameter
t instanceof TypeTemplateParameter
}
predicate hasSuperfluousConstReturn(Function f) {

View File

@@ -38,6 +38,7 @@ predicate isCompiledAsC(File f) {
from FunctionDeclarationEntry fdeIm, FunctionCall fc
where
not any(Compilation c).buildModeNone() and
isCompiledAsC(fdeIm.getFile()) and
not isFromMacroDefinition(fc) and
fdeIm.isImplicit() and

View File

@@ -19,7 +19,10 @@
import cpp
import TooFewArguments
import semmle.code.cpp.ConfigurationTestFile
from FunctionCall fc, Function f
where tooFewArguments(fc, f)
where
tooFewArguments(fc, f) and
not fc.getFile() instanceof ConfigurationTestFile // calls in files generated during configuration are likely false positives
select fc, "This call has fewer arguments than required by $@.", f, f.toString()

View File

@@ -51,5 +51,7 @@ predicate tooFewArguments(FunctionCall fc, Function f) {
hasDefiniteNumberOfParameters(fde)
|
fde.getNumberOfParameters() > fc.getNumberOfArguments()
)
) and
// Don't report on implicit function declarations, as these are likely extraction errors.
not f.getADeclarationEntry().isImplicit()
}

View File

@@ -49,21 +49,16 @@ need to be part of the class. (A classic example of this is the
observes, there are at least two key problems with this approach:
<ul>
<li>
It may be possible to generalize some of the utility functions beyond the
<i>1. It may be possible to generalize some of the utility functions beyond the
narrow context of the class in question -- by bundling them with the class,
the class author reduces the scope for functionality reuse.
</li>
<li>
It's usually impossible for the class author to know every possible
2. It's usually impossible for the class author to know every possible
operation that the user might want to perform on the class, so the public
interface will inherently be incomplete. New utility functions will end up
having a different syntax to the privileged public functions in the class,
negatively impacting on code consistency.
</li>
</ul>
</i>
To refactor a class like this, simply move its utility functions elsewhere,
paring its public interface down to the bare minimum.

View File

@@ -46,21 +46,17 @@ need to be part of the class. (A classic example of this is the
<code>std::string</code> class in the C++ Standard Library.) As [Sutter]
observes, there are at least two key problems with this approach:
<ul>
<li>
It may be possible to generalize some of the utility functions beyond the
<i>
1. It may be possible to generalize some of the utility functions beyond the
narrow context of the class in question -- by bundling them with the class,
the class author reduces the scope for functionality reuse.
</li>
<li>
It's usually impossible for the class author to know every possible
2. It's usually impossible for the class author to know every possible
operation that the user might want to perform on the class, so the public
interface will inherently be incomplete. New utility functions will end up
having a different syntax to the privileged public functions in the class,
negatively impacting on code consistency.
</li>
</ul>
</i>
To refactor a class like this, simply move its utility functions elsewhere,
paring its public interface down to the bare minimum.

View File

@@ -26,12 +26,6 @@ private newtype LibraryT =
LibraryTElement(LibraryElement lib, string name, string version) {
lib.getName() = name and
lib.getVersion() = version
} or
LibraryTExternalPackage(@external_package ep, string name, string version) {
exists(string package_name |
external_packages(ep, _, package_name, version) and
name = package_name
)
}
/**
@@ -41,10 +35,7 @@ class Library extends LibraryT {
string name;
string version;
Library() {
this = LibraryTElement(_, name, version) or
this = LibraryTExternalPackage(_, name, version)
}
Library() { this = LibraryTElement(_, name, version) }
string getName() { result = name }
@@ -63,11 +54,6 @@ class Library extends LibraryT {
this = LibraryTElement(lib, _, _) and
result = lib.getAFile()
)
or
exists(@external_package ep |
this = LibraryTExternalPackage(ep, _, _) and
header_to_external_package(unresolveElement(result), ep)
)
}
}

View File

@@ -14,5 +14,10 @@ predicate hasDuplicateFunctionEntryPointLocation(Function func) {
predicate hasDuplicateFunctionEntryPoint(Function func) { count(func.getEntryPoint()) > 1 }
select count(Function f | hasDuplicateFunctionEntryPoint(f) | f) as duplicateFunctionEntryPoint,
count(Function f | hasDuplicateFunctionEntryPointLocation(f) | f) as duplicateFunctionEntryPointLocation
predicate hasDuplicateDeclarationEntry(DeclStmt stmt, int i) {
strictcount(stmt.getDeclarationEntry(i)) > 1
}
select count(Function f | hasDuplicateFunctionEntryPoint(f)) as duplicateFunctionEntryPoint,
count(Function f | hasDuplicateFunctionEntryPointLocation(f)) as duplicateFunctionEntryPointLocation,
count(DeclStmt stmt, int i | hasDuplicateDeclarationEntry(stmt, i)) as duplicateDeclarationEntry

View File

@@ -40,4 +40,5 @@ select count(Instruction i | IRConsistency::missingOperand(i, _, _, _) | i) as m
count(Instruction i | IRConsistency::nonUniqueEnclosingIRFunction(i, _, _, _) | i) as nonUniqueEnclosingIRFunction,
count(FieldAddressInstruction i | IRConsistency::fieldAddressOnNonPointer(i, _, _, _) | i) as fieldAddressOnNonPointer,
count(Instruction i | IRConsistency::thisArgumentIsNonPointer(i, _, _, _) | i) as thisArgumentIsNonPointer,
count(Instruction i | IRConsistency::nonUniqueIRVariable(i, _, _, _) | i) as nonUniqueIRVariable
count(Instruction i | IRConsistency::nonUniqueIRVariable(i, _, _, _) | i) as nonUniqueIRVariable,
count(Instruction i | IRConsistency::nonBooleanOperand(i, _, _, _) | i) as nonBooleanOperand

View File

@@ -0,0 +1,20 @@
/**
* @name Include file resolution status
* @description Counts unresolved and resolved #includes.
* This query is for internal use only and may change without notice.
* @kind table
* @id cpp/include-resolution-status
*/
import cpp
/**
* A cannot open file error.
*
* Typically this is due to a missing include.
*/
class CannotOpenFileError extends CompilerError {
CannotOpenFileError() { this.hasTag(["cannot_open_file", "cannot_open_file_reason"]) }
}
select count(CannotOpenFileError e) as failed_includes, count(Include i) as successful_includes

View File

@@ -22,9 +22,6 @@ class SalMacro extends Macro {
}
}
/** DEPRECATED: Alias for SalMacro */
deprecated class SALMacro = SalMacro;
pragma[noinline]
private predicate isTopLevelMacroAccess(MacroAccess ma) { not exists(ma.getParentInvocation()) }
@@ -50,9 +47,6 @@ class SalAnnotation extends MacroInvocation {
}
}
/** DEPRECATED: Alias for SalAnnotation */
deprecated class SALAnnotation = SalAnnotation;
/**
* A SAL macro indicating that the return value of a function should always be
* checked.
@@ -63,9 +57,6 @@ class SalCheckReturn extends SalAnnotation {
}
}
/** DEPRECATED: Alias for SalCheckReturn */
deprecated class SALCheckReturn = SalCheckReturn;
/**
* A SAL macro indicating that a pointer variable or return value should not be
* `NULL`.
@@ -89,9 +80,6 @@ class SalNotNull extends SalAnnotation {
}
}
/** DEPRECATED: Alias for SalNotNull */
deprecated class SALNotNull = SalNotNull;
/**
* A SAL macro indicating that a value may be `NULL`.
*/
@@ -105,9 +93,6 @@ class SalMaybeNull extends SalAnnotation {
}
}
/** DEPRECATED: Alias for SalMaybeNull */
deprecated class SALMaybeNull = SalMaybeNull;
/**
* A parameter annotated by one or more SAL annotations.
*/
@@ -124,9 +109,6 @@ class SalParameter extends Parameter {
predicate isInOut() { a.getMacroName().toLowerCase().matches("%\\_inout%") }
}
/** DEPRECATED: Alias for SalParameter */
deprecated class SALParameter = SalParameter;
///////////////////////////////////////////////////////////////////////////////
// Implementation details
/**
@@ -161,7 +143,7 @@ private predicate annotatesAtPosition(SalPosition pos, DeclarationEntry d, File
* A SAL element, that is, a SAL annotation or a declaration entry
* that may have SAL annotations.
*/
library class SalElement extends Element {
class SalElement extends Element {
SalElement() {
containsSalAnnotation(this.(DeclarationEntry).getFile()) or
this instanceof SalAnnotation
@@ -199,9 +181,6 @@ library class SalElement extends Element {
}
}
/** DEPRECATED: Alias for SalElement */
deprecated class SALElement = SalElement;
/** Holds if `file` contains a SAL annotation. */
pragma[noinline]
private predicate containsSalAnnotation(File file) { any(SalAnnotation a).getFile() = file }

View File

@@ -10,11 +10,12 @@ contains sensitive data that could somehow be retrieved by an attacker.</p>
</overview>
<recommendation>
<p>Use alternative platform-supplied functions that will not get optimized away. Examples of such
functions include <code>memset_s</code>, <code>SecureZeroMemory</code>, and <code>bzero_explicit</code>.
Alternatively, passing the <code>-fno-builtin-memset</code> option to the GCC/Clang compiler usually
also prevents the optimization. Finally, you can use the public-domain <code>secure_memzero</code> function
(see references below). This function, however, is not guaranteed to work on all platforms and compilers.</p>
<p>Use <code>memset_s</code> (from C11) instead of <code>memset</code>, as <code>memset_s</code> will not
get optimized away. Alternatively use platform-supplied functions such as <code>SecureZeroMemory</code> or
<code>bzero_explicit</code> that make the same guarantee. Passing the <code>-fno-builtin-memset</code>
option to the GCC/Clang compiler usually also prevents the optimization. Finally, you can use the
public-domain <code>secure_memzero</code> function (see references below). This function, however, is not
guaranteed to work on all platforms and compilers.</p>
</recommendation>
<example>

View File

@@ -8,7 +8,7 @@
* @security-severity 7.8
* @precision high
* @tags security
* external/cwe/cwe-14
* external/cwe/cwe-014
*/
import cpp

View File

@@ -5,7 +5,7 @@
* to it.
* @id cpp/count-untrusted-data-external-api
* @kind table
* @tags security external/cwe/cwe-20
* @tags security external/cwe/cwe-020
*/
import cpp

View File

@@ -41,20 +41,6 @@ class ExternalApiDataNode extends DataFlow::Node {
string getFunctionDescription() { result = this.getExternalFunction().toString() }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
deprecated class UntrustedDataToExternalApiConfig extends TaintTracking::Configuration {
UntrustedDataToExternalApiConfig() { this = "UntrustedDataToExternalAPIConfig" }
override predicate isSource(DataFlow::Node source) {
exists(RemoteFlowSourceFunction remoteFlow |
remoteFlow = source.asExpr().(Call).getTarget() and
remoteFlow.hasRemoteFlowSource(_, _)
)
}
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalApiDataNode }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
private module UntrustedDataToExternalApiConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {

View File

@@ -5,7 +5,7 @@
* to it.
* @id cpp/count-untrusted-data-external-api-ir
* @kind table
* @tags security external/cwe/cwe-20
* @tags security external/cwe/cwe-020
*/
import cpp

View File

@@ -6,7 +6,7 @@
* @precision low
* @problem.severity error
* @security-severity 7.8
* @tags security external/cwe/cwe-20
* @tags security external/cwe/cwe-020
*/
import cpp

View File

@@ -6,7 +6,7 @@
* @precision low
* @problem.severity error
* @security-severity 7.8
* @tags security external/cwe/cwe-20
* @tags security external/cwe/cwe-020
*/
import cpp

View File

@@ -41,15 +41,6 @@ class ExternalApiDataNode extends DataFlow::Node {
string getFunctionDescription() { result = this.getExternalFunction().toString() }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
deprecated class UntrustedDataToExternalApiConfig extends TaintTracking::Configuration {
UntrustedDataToExternalApiConfig() { this = "UntrustedDataToExternalAPIConfigIR" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalApiDataNode }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
private module UntrustedDataToExternalApiConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

View File

@@ -1,22 +0,0 @@
int main(int argc, char** argv) {
char *userAndFile = argv[2];
{
char fileBuffer[FILENAME_MAX] = "/home/";
char *fileName = fileBuffer;
size_t len = strlen(fileName);
strncat(fileName+len, userAndFile, FILENAME_MAX-len-1);
// BAD: a string from the user is used in a filename
fopen(fileName, "wb+");
}
{
char fileBuffer[FILENAME_MAX] = "/home/";
char *fileName = fileBuffer;
size_t len = strlen(fileName);
// GOOD: use a fixed file
char* fixed = "jim/file.txt";
strncat(fileName+len, fixed, FILENAME_MAX-len-1);
fopen(fileName, "wb+");
}
}

View File

@@ -3,36 +3,57 @@
"qhelp.dtd">
<qhelp>
<overview>
<p>Accessing paths controlled by users can allow an attacker to access unexpected resources. This
<p>Accessing paths controlled by users can allow an attacker to access unexpected resources. This
can result in sensitive information being revealed or deleted, or an attacker being able to influence
behavior by modifying unexpected files.</p>
<p>Paths that are naively constructed from data controlled by a user may contain unexpected special characters,
such as "..". Such a path may potentially point to any directory on the filesystem.</p>
<p>Paths that are naively constructed from data controlled by a user may be absolute paths, or may contain
unexpected special characters such as "..". Such a path could point anywhere on the file system.</p>
</overview>
<recommendation>
<p>Validate user input before using it to construct a filepath. Ideally, follow these rules:</p>
<p>Validate user input before using it to construct a file path.</p>
<ul>
<li>Do not allow more than a single "." character.</li>
<li>Do not allow directory separators such as "/" or "\" (depending on the filesystem).</li>
<li>Do not rely on simply replacing problematic sequences such as "../". For example, after applying this filter to
".../...//" the resulting string would still be "../".</li>
<li>Ideally use a whitelist of known good patterns.</li>
</ul>
<p>Common validation methods include checking that the normalized path is relative and does not contain
any ".." components, or checking that the path is contained within a safe folder. The method you should use depends
on how the path is used in the application, and whether the path should be a single path component.
</p>
<p>If the path should be a single path component (such as a file name), you can check for the existence
of any path separators ("/" or "\"), or ".." sequences in the input, and reject the input if any are found.
</p>
<p>
Note that removing "../" sequences is <i>not</i> sufficient, since the input could still contain a path separator
followed by "..". For example, the input ".../...//" would still result in the string "../" if only "../" sequences
are removed.
</p>
<p>Finally, the simplest (but most restrictive) option is to use an allow list of safe patterns and make sure that
the user input matches one of these patterns.</p>
</recommendation>
<example>
<p>In this example, a username and file are read from the arguments to main and then used to access a file in the
user's home directory. However, a malicious user could enter a filename which contains special
characters. For example, the string "../../etc/passwd" will result in the code reading the file located at
"/home/[user]/../../etc/passwd", which is the system's password file. This could potentially allow them to
access all the system's passwords.</p>
<p>In this example, a file name is read from a user and then used to access a file.
However, a malicious user could enter a file name anywhere on the file system,
such as "/etc/passwd" or "../../../etc/passwd".</p>
<sample src="TaintedPath.c" />
<sample src="examples/TaintedPath.c" />
<p>
If the input should only be a file name, you can check that it doesn't contain any path separators or ".." sequences.
</p>
<sample src="examples/TaintedPathNormalize.c" />
<p>
If the input should be within a specific directory, you can check that the resolved path
is still contained within that directory.
</p>
<sample src="examples/TaintedPathFolder.c" />
</example>
<references>
@@ -41,6 +62,7 @@ access all the system's passwords.</p>
OWASP:
<a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.
</li>
<li>Linux man pages: <a href="https://man7.org/linux/man-pages/man3/realpath.3.html">realpath(3)</a>.</li>
</references>
</qhelp>

View File

@@ -88,6 +88,17 @@ module TaintedPathConfig implements DataFlow::ConfigSig {
hasUpperBoundsCheck(checkedVar)
)
}
predicate isBarrierOut(DataFlow::Node node) {
// make sinks barriers so that we only report the closest instance
isSink(node)
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
result = sink.asIndirectArgument().getLocation()
}
}
module TaintedPath = TaintTracking::Global<TaintedPathConfig>;

View File

@@ -0,0 +1,10 @@
int main(int argc, char** argv) {
char *userAndFile = argv[2];
{
char fileBuffer[PATH_MAX];
snprintf(fileBuffer, sizeof(fileBuffer), "/home/%s", userAndFile);
// BAD: a string from the user is used in a filename
fopen(fileBuffer, "wb+");
}
}

View File

@@ -0,0 +1,28 @@
#include <stdio.h>
#include <string.h>
int main(int argc, char** argv) {
char *userAndFile = argv[2];
const char *baseDir = "/home/user/public/";
char fullPath[PATH_MAX];
// Attempt to concatenate the base directory and the user-supplied path
snprintf(fullPath, sizeof(fullPath), "%s%s", baseDir, userAndFile);
// Resolve the absolute path, normalizing any ".." or "."
char *resolvedPath = realpath(fullPath, NULL);
if (resolvedPath == NULL) {
perror("Error resolving path");
return 1;
}
// Check if the resolved path starts with the base directory
if (strncmp(baseDir, resolvedPath, strlen(baseDir)) != 0) {
free(resolvedPath);
return 1;
}
// GOOD: Path is within the intended directory
FILE *file = fopen(resolvedPath, "wb+");
free(resolvedPath);
}

View File

@@ -0,0 +1,16 @@
#include <stdio.h>
#include <string.h>
int main(int argc, char** argv) {
char *fileName = argv[2];
// Check for invalid sequences in the user input
if (strstr(fileName , "..") || strchr(fileName , '/') || strchr(fileName , '\\')) {
printf("Invalid filename.\n");
return 1;
}
char fileBuffer[PATH_MAX];
snprintf(fileBuffer, sizeof(fileBuffer), "/home/user/files/%s", fileName);
// GOOD: We know that the filename is safe and stays within the public folder
FILE *file = fopen(fileBuffer, "wb+");
}

View File

@@ -19,7 +19,6 @@ import semmle.code.cpp.security.Security
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.ir.dataflow.TaintTracking2
import semmle.code.cpp.security.FlowSources
import semmle.code.cpp.models.implementations.Strcat
import ExecTaint::PathGraph
@@ -50,11 +49,17 @@ predicate interestingConcatenation(DataFlow::Node incoming, DataFlow::Node outgo
call.getTarget() = op and
op.hasQualifiedName("std", "operator+") and
op.getType().(UserType).hasQualifiedName("std", "basic_string") and
incoming.asIndirectArgument() = call.getArgument(1) and // left operand
incoming.asIndirectArgument() = call.getArgument(1) and // right operand
call = outgoing.asInstruction().getUnconvertedResultExpression()
)
}
/**
* A state will represent the most recent concatenation that occurred in the data flow.
* - `TConcatState` if the concetenation has not yet occurred.
* - `TExecState(incoming, outgoing)`, representing the concatenation of data from `incoming`
* into result `outgoing`.
*/
newtype TState =
TConcatState() or
TExecState(DataFlow::Node incoming, DataFlow::Node outgoing) {
@@ -75,7 +80,9 @@ class ExecState extends TExecState {
DataFlow::Node getOutgoingNode() { result = outgoing }
/** Holds if this is a possible `ExecState` for `sink`. */
/**
* Holds if this is a possible `ExecState` at `sink`, that is, if `outgoing` flows to `sink`.
*/
predicate isFeasibleForSink(DataFlow::Node sink) { ExecState::flow(outgoing, sink) }
string toString() { result = "ExecState" }
@@ -111,6 +118,12 @@ module ExecStateConfig implements DataFlow::ConfigSig {
module ExecState = TaintTracking::Global<ExecStateConfig>;
/**
* A full `TaintTracking` configuration from source to concatenation to sink, using a flow
* state to remember the concatenation. It's important that we track flow to the sink even though
* as soon as we reach the concatenation we know it will get there (due to the check of
* `isFeasibleForSink`), because this way we get a complete flow path.
*/
module ExecTaintConfig implements DataFlow::StateConfigSig {
class FlowState = TState;
@@ -137,6 +150,17 @@ module ExecTaintConfig implements DataFlow::StateConfigSig {
predicate isBarrierOut(DataFlow::Node node, FlowState state) {
isSink(node, state) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(DataFlow::Node concatResult, Expr command, ExecState state |
result = [concatResult.getLocation(), command.getLocation()] and
isSink(sink, state) and
isSinkImpl(sink, command, _) and
concatResult = state.getOutgoingNode()
)
}
}
module ExecTaint = TaintTracking::GlobalWithState<ExecTaintConfig>;

View File

@@ -13,15 +13,13 @@
import cpp
import semmle.code.cpp.commons.Environment
import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl
import TaintedWithPath
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.ir.IR
import Flow::PathGraph
/** A call that prints its arguments to `stdout`. */
class PrintStdoutCall extends FunctionCall {
PrintStdoutCall() {
this.getTarget().hasGlobalOrStdName("puts") or
this.getTarget().hasGlobalOrStdName("printf")
}
PrintStdoutCall() { this.getTarget().hasGlobalOrStdName(["puts", "printf"]) }
}
/** A read of the QUERY_STRING environment variable */
@@ -29,19 +27,31 @@ class QueryString extends EnvironmentRead {
QueryString() { this.getEnvironmentVariable() = "QUERY_STRING" }
}
class Configuration extends TaintTrackingConfiguration {
override predicate isSource(Expr source) { source instanceof QueryString }
module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node.asIndirectExpr() instanceof QueryString }
override predicate isSink(Element tainted) {
exists(PrintStdoutCall call | call.getAnArgument() = tainted)
predicate isSink(DataFlow::Node node) {
exists(PrintStdoutCall call | call.getAnArgument() = [node.asIndirectExpr(), node.asExpr()])
}
override predicate isBarrier(Expr e) {
super.isBarrier(e) or e.getUnspecifiedType() instanceof IntegralType
predicate isBarrier(DataFlow::Node node) {
isSink(node) and node.asExpr().getUnspecifiedType() instanceof ArithmeticType
or
node.asCertainDefinition().getUnspecifiedType() instanceof ArithmeticType
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSourceLocation(DataFlow::Node source) {
exists(QueryString query | result = query.getLocation() | query = source.asIndirectExpr())
}
}
from QueryString query, Element printedArg, PathNode sourceNode, PathNode sinkNode
where taintedWithPath(query, printedArg, sourceNode, sinkNode)
select printedArg, sourceNode, sinkNode, "Cross-site scripting vulnerability due to $@.", query,
"this query data"
module Flow = TaintTracking::Global<Config>;
from QueryString query, Flow::PathNode sourceNode, Flow::PathNode sinkNode
where
Flow::flowPath(sourceNode, sinkNode) and
query = sourceNode.getNode().asIndirectExpr()
select sinkNode.getNode(), sourceNode, sinkNode, "Cross-site scripting vulnerability due to $@.",
query, "this query data"

View File

@@ -38,6 +38,9 @@ module SqlTaintedConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node node) {
exists(SqlLikeFunction runSql | runSql.outermostWrapperFunctionCall(asSinkExpr(node), _))
or
// sink defined using models-as-data
sinkNode(node, "sql-injection")
}
predicate isBarrier(DataFlow::Node node) {
@@ -51,18 +54,32 @@ module SqlTaintedConfig implements DataFlow::ConfigSig {
sql.barrierSqlArgument(input, _)
)
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(Expr taintedArg | result = taintedArg.getLocation() | taintedArg = asSinkExpr(sink))
}
}
module SqlTainted = TaintTracking::Global<SqlTaintedConfig>;
from
SqlLikeFunction runSql, Expr taintedArg, FlowSource taintSource, SqlTainted::PathNode sourceNode,
SqlTainted::PathNode sinkNode, string callChain
Expr taintedArg, FlowSource taintSource, SqlTainted::PathNode sourceNode,
SqlTainted::PathNode sinkNode, string extraText
where
runSql.outermostWrapperFunctionCall(taintedArg, callChain) and
(
exists(SqlLikeFunction runSql, string callChain |
runSql.outermostWrapperFunctionCall(taintedArg, callChain) and
extraText = " and then passed to " + callChain
)
or
sinkNode(sinkNode.getNode(), "sql-injection") and
extraText = ""
) and
SqlTainted::flowPath(sourceNode, sinkNode) and
taintedArg = asSinkExpr(sinkNode.getNode()) and
taintSource = sourceNode.getNode()
select taintedArg, sourceNode, sinkNode,
"This argument to a SQL query function is derived from $@ and then passed to " + callChain + ".",
taintSource, "user input (" + taintSource.getSourceType() + ")"
"This argument to a SQL query function is derived from $@" + extraText + ".", taintSource,
"user input (" + taintSource.getSourceType() + ")"

View File

@@ -14,25 +14,46 @@
import cpp
import semmle.code.cpp.security.Security
import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl
import TaintedWithPath
import semmle.code.cpp.security.FlowSources
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.ir.IR
import Flow::PathGraph
predicate isProcessOperationExplanation(Expr arg, string processOperation) {
predicate isProcessOperationExplanation(DataFlow::Node arg, string processOperation) {
exists(int processOperationArg, FunctionCall call |
isProcessOperationArgument(processOperation, processOperationArg) and
call.getTarget().getName() = processOperation and
call.getArgument(processOperationArg) = arg
call.getArgument(processOperationArg) = arg.asIndirectExpr()
)
}
class Configuration extends TaintTrackingConfiguration {
override predicate isSink(Element arg) { isProcessOperationExplanation(arg, _) }
predicate isSource(FlowSource source, string sourceType) { sourceType = source.getSourceType() }
module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { isSource(node, _) }
predicate isSink(DataFlow::Node node) { isProcessOperationExplanation(node, _) }
predicate isBarrier(DataFlow::Node node) {
isSink(node) and node.asExpr().getUnspecifiedType() instanceof ArithmeticType
or
node.asCertainDefinition().getUnspecifiedType() instanceof ArithmeticType
}
predicate observeDiffInformedIncrementalMode() { any() }
}
from string processOperation, Expr arg, Expr source, PathNode sourceNode, PathNode sinkNode
module Flow = TaintTracking::Global<Config>;
from
string processOperation, string sourceType, DataFlow::Node source, DataFlow::Node sink,
Flow::PathNode sourceNode, Flow::PathNode sinkNode
where
isProcessOperationExplanation(arg, processOperation) and
taintedWithPath(source, arg, sourceNode, sinkNode)
select arg, sourceNode, sinkNode,
source = sourceNode.getNode() and
sink = sinkNode.getNode() and
isSource(source, sourceType) and
isProcessOperationExplanation(sink, processOperation) and
Flow::flowPath(sourceNode, sinkNode)
select sink, sourceNode, sinkNode,
"The value of this argument may come from $@ and is being passed to " + processOperation + ".",
source, source.toString()
source, sourceType

View File

@@ -5,8 +5,9 @@
* buffer.
* @kind problem
* @id cpp/overflow-buffer
* @problem.severity recommendation
* @problem.severity warning
* @security-severity 9.3
* @precision medium
* @tags security
* external/cwe/cwe-119
* external/cwe/cwe-121
@@ -26,6 +27,7 @@ from
BufferAccess ba, string bufferDesc, int accessSize, int accessType, Element bufferAlloc,
int bufferSize, string message
where
accessType != 4 and
accessSize = ba.getSize() and
bufferSize = getBufferSize(ba.getBuffer(bufferDesc, accessType), bufferAlloc) and
(

View File

@@ -20,6 +20,7 @@ import semmle.code.cpp.models.interfaces.Allocation
import semmle.code.cpp.models.interfaces.ArrayFunction
import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.RangeAnalysis
import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticExprSpecific
import semmle.code.cpp.security.ProductFlowUtils.ProductFlowUtils
import semmle.code.cpp.rangeanalysis.new.RangeAnalysisUtil
import StringSizeFlow::PathGraph1
import codeql.util.Unit
@@ -43,20 +44,28 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
)
}
predicate isSinkPairImpl(
CallInstruction c, DataFlow::Node bufSink, DataFlow::Node sizeSink, int delta, Expr eBuf
/**
* Holds if `c` a call to an `ArrayFunction` with buffer argument `bufSink`,
* and a size argument `sizeInstr` which satisfies `sizeInstr <= sizeBound + delta`.
*
* Furthermore, the `sizeSink` node is the dataflow node corresponding to
* `sizeBound`, and the expression `eBuf` is the expression corresponding
* to `bufInstr`.
*/
predicate isSinkPairImpl0(
CallInstruction c, DataFlow::Node bufSink, DataFlow::Node sizeSink, int delta, Expr eBuf,
Instruction sizeBound, Instruction sizeInstr
) {
exists(
int bufIndex, int sizeIndex, Instruction sizeInstr, Instruction bufInstr, ArrayFunction func
|
exists(int bufIndex, int sizeIndex, Instruction bufInstr, ArrayFunction func |
bufInstr = bufSink.asInstruction() and
c.getArgument(bufIndex) = bufInstr and
sizeInstr = sizeSink.asInstruction() and
sizeBound = sizeSink.asInstruction() and
c.getArgument(sizeIndex) = sizeInstr and
c.getStaticCallTarget() = func and
pragma[only_bind_into](func)
.hasArrayWithVariableSize(pragma[only_bind_into](bufIndex),
pragma[only_bind_into](sizeIndex)) and
bounded(c.getArgument(sizeIndex), sizeInstr, delta) and
bounded(sizeInstr, sizeBound, delta) and
eBuf = bufInstr.getUnconvertedResultExpression()
)
}
@@ -82,119 +91,43 @@ module ValidState {
* library will perform, and visit all the places where the size argument is modified.
* 2. Once that dataflow traversal is done, we accumulate the offsets added at each places
* where the offset is modified (see `validStateImpl`).
*
* Because we want to guarantee that each place where we modify the offset has a `PathNode`
* we "flip" a boolean flow state in each `isAdditionalFlowStep`. This ensures that the node
* has a corresponding `PathNode`.
*/
private module ValidStateConfig implements DataFlow::StateConfigSig {
class FlowState = boolean;
private module ValidStateConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { hasSize(_, source, _) }
predicate isSource(DataFlow::Node source, FlowState state) {
hasSize(_, source, _) and
state = false
}
predicate isSink(DataFlow::Node sink) { isSinkPairImpl0(_, _, sink, _, _, _, _) }
predicate isSink(DataFlow::Node sink, FlowState state) {
isSinkPairImpl(_, _, sink, _, _) and
state = [false, true]
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
isAdditionalFlowStep2(node1, node2, _) and
state1 = [false, true] and
state2 = state1.booleanNot()
}
predicate includeHiddenNodes() { any() }
predicate isBarrierOut(DataFlow::Node node) { DataFlow::flowsToBackEdge(node) }
}
private import DataFlow::GlobalWithState<ValidStateConfig>
private import DataFlow::Global<ValidStateConfig>
private predicate inLoop(PathNode n) { n.getASuccessor+() = n }
/**
* Holds if `value` is a possible offset for `n`.
*
* To ensure termination, we limit `value` to be in the
* range `[-2, 2]` if the node is part of a loop. Without
* this restriction we wouldn't terminate on an example like:
* ```cpp
* while(unknown()) { size++; }
* ```
*/
private predicate validStateImpl(PathNode n, int value) {
// If the dataflow node depends recursively on itself we restrict the range.
(inLoop(n) implies value = [-2 .. 2]) and
(
// For the dataflow source we have an allocation such as `malloc(size + k)`,
// and the value of the flow-state is then `k`.
hasSize(_, n.getNode(), value)
or
// For a dataflow sink any `value` that is strictly smaller than the delta
// needs to be a valid flow-state. That is, for a snippet like:
// ```
// p = b ? new char[size] : new char[size + 1];
// memset(p, 0, size + 2);
// ```
// the valid flow-states at the `memset` must include the set `{0, 1}` since the
// flow-state at `new char[size]` is `0`, and the flow-state at `new char[size + 1]`
// is `1`.
//
// So we find a valid flow-state at the sink's predecessor, and use the definition
// of our sink predicate to compute the valid flow-states at the sink.
exists(int delta, PathNode n0 |
n0.getASuccessor() = n and
validStateImpl(n0, value) and
isSinkPairImpl(_, _, n.getNode(), delta, _) and
delta > value
)
or
// For a non-source and non-sink node there is two cases to consider.
// 1. A node where we have to update the flow-state, or
// 2. A node that doesn't update the flow-state.
//
// For case 1, we compute the new flow-state by adding the constant operand of the
// `AddInstruction` to the flow-state of any predecessor node.
// For case 2 we simply propagate the valid flow-states from the predecessor node to
// the next one.
exists(PathNode n0, DataFlow::Node node0, DataFlow::Node node, int value0 |
n0.getASuccessor() = n and
validStateImpl(n0, value0) and
node = n.getNode() and
node0 = n0.getNode()
|
exists(int delta |
isAdditionalFlowStep2(node0, node, delta) and
value0 = value + delta
)
or
not isAdditionalFlowStep2(node0, node, _) and
value = value0
)
)
}
predicate validState(DataFlow::Node n, int value) {
validStateImpl(any(PathNode pn | pn.getNode() = n), value)
predicate validState(DataFlow::Node source, DataFlow::Node sink, int value) {
hasSize(_, source, value) and
flow(source, sink)
}
}
import ValidState
/**
* Holds if `node2` is a dataflow node that represents an addition of two operands `op1`
* and `op2` such that:
* 1. `node1` is the dataflow node that represents `op1`, and
* 2. the value of `op2` can be upper bounded by `delta.`
*/
predicate isAdditionalFlowStep2(DataFlow::Node node1, DataFlow::Node node2, int delta) {
exists(AddInstruction add, Operand op |
add.hasOperands(node1.asOperand(), op) and
semBounded(getSemanticExpr(op.getDef()), any(SemZeroBound zero), delta, true, _) and
node2.asInstruction() = add
module SizeBarrierInput implements SizeBarrierInputSig {
int fieldFlowBranchLimit() { result = 2 }
predicate isSource(DataFlow::Node source) {
exists(int state |
hasSize(_, source, state) and
validState(source, _, state)
)
}
}
predicate isSinkPairImpl(
CallInstruction c, DataFlow::Node bufSink, DataFlow::Node sizeSink, int delta, Expr eBuf
) {
exists(Instruction sizeBound, Instruction sizeInstr |
isSinkPairImpl0(c, bufSink, sizeSink, delta, eBuf, sizeBound, sizeInstr) and
not sizeBound = SizeBarrier<SizeBarrierInput>::getABarrierInstruction(delta) and
not sizeInstr = SizeBarrier<SizeBarrierInput>::getABarrierInstruction(delta)
)
}
@@ -214,32 +147,24 @@ module StringSizeConfig implements ProductFlow::StateConfigSig {
// to the size of the allocation. This state is then checked in `isSinkPair`.
exists(state1) and
hasSize(bufSource.asExpr(), sizeSource, state2) and
validState(sizeSource, state2)
validState(sizeSource, _, state2)
}
predicate isSinkPair(
DataFlow::Node bufSink, FlowState1 state1, DataFlow::Node sizeSink, FlowState2 state2
) {
exists(state1) and
validState(sizeSink, state2) and
validState(_, sizeSink, state2) and
exists(int delta |
isSinkPairImpl(_, bufSink, sizeSink, delta, _) and
delta > state2
)
}
predicate isBarrierOut2(DataFlow::Node node) {
node = any(DataFlow::SsaPhiNode phi).getAnInput(true)
}
predicate isBarrierOut2(DataFlow::Node node) { DataFlow::flowsToBackEdge(node) }
predicate isAdditionalFlowStep2(
DataFlow::Node node1, FlowState2 state1, DataFlow::Node node2, FlowState2 state2
) {
validState(node2, state2) and
exists(int delta |
isAdditionalFlowStep2(node1, node2, delta) and
state1 = state2 + delta
)
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
node = SizeBarrier<SizeBarrierInput>::getABarrierNode(state)
}
}

Some files were not shown because too many files have changed in this diff Show More