Merge remote-tracking branch 'upstream/master' into SimpleRangeAnalysis-use-after-cast

This commit is contained in:
Jonas Jensen
2019-04-01 09:10:57 +02:00
87 changed files with 1770 additions and 821 deletions

View File

@@ -12,6 +12,7 @@
* readability
*/
import cpp
private import semmle.code.cpp.commons.Exclusions
private import semmle.code.cpp.rangeanalysis.PointlessComparison
private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
import UnsignedGEZero
@@ -31,6 +32,7 @@ from
where
not cmp.isInMacroExpansion() and
not cmp.isFromTemplateInstantiation(_) and
not functionContainsDisabledCode(cmp.getEnclosingFunction()) and
reachablePointlessComparison(cmp, left, right, value, ss) and
// a comparison between an enum and zero is always valid because whether

View File

@@ -11,6 +11,7 @@
* external/cwe/cwe-561
*/
import cpp
private import semmle.code.cpp.commons.Exclusions
class PureExprInVoidContext extends ExprInVoidContext {
PureExprInVoidContext() { this.isPure() }
@@ -23,71 +24,29 @@ predicate accessInInitOfForStmt(Expr e) {
s.getExpr() = e)
}
/**
* Holds if the preprocessor branch `pbd` is on line `pbdStartLine` in file `file`.
*/
predicate pbdLocation(PreprocessorBranchDirective pbd, string file, int pbdStartLine) {
pbd.getLocation().hasLocationInfo(file, pbdStartLine, _, _, _)
}
/**
* Holds if the body of the function `f` is on lines `fBlockStartLine` to `fBlockEndLine` in file `file`.
*/
predicate functionLocation(Function f, string file, int fBlockStartLine, int fBlockEndLine) {
f.getBlock().getLocation().hasLocationInfo(file, fBlockStartLine, _, fBlockEndLine, _)
}
/**
* Holds if the function `f`, or a function called by it, contains
* code excluded by the preprocessor.
*/
predicate containsDisabledCode(Function f) {
// `f` contains a preprocessor branch that was not taken
exists(PreprocessorBranchDirective pbd, string file, int pbdStartLine, int fBlockStartLine, int fBlockEndLine |
functionLocation(f, file, fBlockStartLine, fBlockEndLine) and
pbdLocation(pbd, file, pbdStartLine) and
pbdStartLine <= fBlockEndLine and
pbdStartLine >= fBlockStartLine and
(
pbd.(PreprocessorBranch).wasNotTaken() or
// an else either was not taken, or it's corresponding branch
// was not taken.
pbd instanceof PreprocessorElse
)
) or
predicate functionContainsDisabledCodeRecursive(Function f) {
functionContainsDisabledCode(f) or
// recurse into function calls
exists(FunctionCall fc |
fc.getEnclosingFunction() = f and
containsDisabledCode(fc.getTarget())
functionContainsDisabledCodeRecursive(fc.getTarget())
)
}
/**
* Holds if the function `f`, or a function called by it, is inside a
* preprocessor branch that may have code in another arm
*/
predicate definedInIfDef(Function f) {
exists(PreprocessorBranchDirective pbd, string file, int pbdStartLine, int pbdEndLine, int fBlockStartLine, int fBlockEndLine |
functionLocation(f, file, fBlockStartLine, fBlockEndLine) and
pbdLocation(pbd, file, pbdStartLine) and
pbdLocation(pbd.getNext(), file, pbdEndLine) and
pbdStartLine <= fBlockStartLine and
pbdEndLine >= fBlockEndLine and
// pbd is a preprocessor branch where multiple branches exist
(
pbd.getNext() instanceof PreprocessorElse or
pbd instanceof PreprocessorElse or
pbd.getNext() instanceof PreprocessorElif or
pbd instanceof PreprocessorElif
)
) or
predicate functionDefinedInIfDefRecursive(Function f) {
functionDefinedInIfDef(f) or
// recurse into function calls
exists(FunctionCall fc |
fc.getEnclosingFunction() = f and
definedInIfDef(fc.getTarget())
functionDefinedInIfDefRecursive(fc.getTarget())
)
}
@@ -121,8 +80,8 @@ where // EQExprs are covered by CompareWhereAssignMeant.ql
not parent instanceof PureExprInVoidContext and
not peivc.getEnclosingFunction().isCompilerGenerated() and
not peivc.getType() instanceof UnknownType and
not containsDisabledCode(peivc.(FunctionCall).getTarget()) and
not definedInIfDef(peivc.(FunctionCall).getTarget()) and
not functionContainsDisabledCodeRecursive(peivc.(FunctionCall).getTarget()) and
not functionDefinedInIfDefRecursive(peivc.(FunctionCall).getTarget()) and
if peivc instanceof FunctionCall then
exists(Function target |
target = peivc.(FunctionCall).getTarget() and

View File

@@ -14,15 +14,20 @@
import cpp
import semmle.code.cpp.security.TaintTracking
from Expr source, Expr tainted, BinaryArithmeticOperation oper,
SizeofOperator sizeof, string taintCause
where tainted(source, tainted)
and oper.getAnOperand() = tainted
and oper.getOperator() = "*"
and oper.getAnOperand() = sizeof
and oper != tainted
and sizeof.getValue().toInt() > 1
and isUserInput(source, taintCause)
select
oper, "This allocation size is derived from $@ and might overflow",
source, "user input (" + taintCause + ")"
predicate taintedAllocSize(Expr e, Expr source, string taintCause) {
(
isAllocationExpr(e) or
any(MulExpr me | me.getAChild() instanceof SizeofOperator) = e
) and
exists(Expr tainted |
tainted = e.getAChild() and
tainted.getType().getUnspecifiedType() instanceof IntegralType and
isUserInput(source, taintCause) and
tainted(source, tainted)
)
}
from Expr e, Expr source, string taintCause
where taintedAllocSize(e, source, taintCause)
select e, "This allocation size is derived from $@ and might overflow", source,
"user input (" + taintCause + ")"

View File

@@ -68,6 +68,9 @@ some are after the final <code>#endif</code>. All three of these things must be
<li>
<a href="http://www.cplusplus.com/forum/articles/10627/">Headers and Includes: Why and How</a>
</li>
<li>
<a href="https://gcc.gnu.org/onlinedocs/cppinternals/Guard-Macros.html">The Multiple-Include Optimization</a>
</li>
</references>

View File

@@ -302,7 +302,13 @@ class File extends Container, @file {
predicate compiledAsMicrosoft() {
exists(Compilation c |
c.getAFileCompiled() = this and
c.getAnArgument() = "--microsoft"
(
c.getAnArgument() = "--microsoft" or
c.getAnArgument().toLowerCase().replaceAll("\\", "/").matches("%/cl.exe")
)
) or exists(File parent |
parent.compiledAsMicrosoft() and
parent.getAnIncludedFile() = this
)
}

View File

@@ -39,7 +39,16 @@ predicate allocationFunction(Function f)
name = "MmAllocateNodePagesForMdlEx" or
name = "MmMapLockedPagesWithReservedMapping" or
name = "MmMapLockedPages" or
name = "MmMapLockedPagesSpecifyCache"
name = "MmMapLockedPagesSpecifyCache" or
name = "LocalAlloc" or
name = "LocalReAlloc" or
name = "GlobalAlloc" or
name = "GlobalReAlloc" or
name = "HeapAlloc" or
name = "HeapReAlloc" or
name = "VirtualAlloc" or
name = "CoTaskMemAlloc" or
name = "CoTaskMemRealloc"
)
)
}
@@ -81,7 +90,17 @@ predicate freeFunction(Function f, int argNum)
(name = "MmFreeMappingAddress" and argNum = 0) or
(name = "MmFreePagesFromMdl" and argNum = 0) or
(name = "MmUnmapReservedMapping" and argNum = 0) or
(name = "MmUnmapLockedPages" and argNum = 0)
(name = "MmUnmapLockedPages" and argNum = 0) or
(name = "LocalFree" and argNum = 0) or
(name = "GlobalFree" and argNum = 0) or
(name = "HeapFree" and argNum = 2) or
(name = "VirtualFree" and argNum = 0) or
(name = "CoTaskMemFree" and argNum = 0) or
(name = "SysFreeString" and argNum = 0) or
(name = "LocalReAlloc" and argNum = 0) or
(name = "GlobalReAlloc" and argNum = 0) or
(name = "HeapReAlloc" and argNum = 2) or
(name = "CoTaskMemRealloc" and argNum = 0)
)
)
}

View File

@@ -0,0 +1,60 @@
/**
* Common predicates used to exclude results from a query based on heuristics.
*/
import cpp
/**
* Holds if the preprocessor branch `pbd` is on line `pbdStartLine` in file `file`.
*/
private predicate pbdLocation(PreprocessorBranchDirective pbd, string file, int pbdStartLine) {
pbd.getLocation().hasLocationInfo(file, pbdStartLine, _, _, _)
}
/**
* Holds if the body of the function `f` is on lines `fBlockStartLine` to `fBlockEndLine` in file `file`.
*/
private predicate functionLocation(Function f, string file, int fBlockStartLine, int fBlockEndLine) {
f.getBlock().getLocation().hasLocationInfo(file, fBlockStartLine, _, fBlockEndLine, _)
}
/**
* Holds if the function `f` is inside a preprocessor branch that may have code in another arm.
*/
predicate functionDefinedInIfDef(Function f) {
exists(PreprocessorBranchDirective pbd, string file, int pbdStartLine, int pbdEndLine, int fBlockStartLine,
int fBlockEndLine |
functionLocation(f, file, fBlockStartLine, fBlockEndLine) and
pbdLocation(pbd, file, pbdStartLine) and
pbdLocation(pbd.getNext(), file, pbdEndLine) and
pbdStartLine <= fBlockStartLine and
pbdEndLine >= fBlockEndLine and
// pbd is a preprocessor branch where multiple branches exist
(
pbd.getNext() instanceof PreprocessorElse or
pbd instanceof PreprocessorElse or
pbd.getNext() instanceof PreprocessorElif or
pbd instanceof PreprocessorElif
)
)
}
/**
* Holds if the function `f` contains code excluded by the preprocessor.
*/
predicate functionContainsDisabledCode(Function f) {
// `f` contains a preprocessor branch that was not taken
exists(PreprocessorBranchDirective pbd, string file, int pbdStartLine, int fBlockStartLine, int fBlockEndLine |
functionLocation(f, file, fBlockStartLine, fBlockEndLine) and
pbdLocation(pbd, file, pbdStartLine) and
pbdStartLine <= fBlockEndLine and
pbdStartLine >= fBlockStartLine and
(
pbd.(PreprocessorBranch).wasNotTaken() or
// an else either was not taken, or it's corresponding branch
// was not taken.
pbd instanceof PreprocessorElse
)
)
}

View File

@@ -32,19 +32,19 @@ class AttributeFormattingFunction extends FormattingFunction {
* A standard function such as `vprintf` that has a format parameter
* and a variable argument list of type `va_arg`.
*/
predicate primitiveVariadicFormatter(TopLevelFunction f, int formatParamIndex, boolean wide) {
predicate primitiveVariadicFormatter(TopLevelFunction f, int formatParamIndex) {
f.getName().regexpMatch("_?_?va?[fs]?n?w?printf(_s)?(_p)?(_l)?")
and (
if f.getName().matches("%\\_l")
then formatParamIndex = f.getNumberOfParameters() - 3
else formatParamIndex = f.getNumberOfParameters() - 2
) and if f.getName().matches("%w%") then wide = true else wide = false
)
}
private
predicate callsVariadicFormatter(Function f, int formatParamIndex, boolean wide) {
predicate callsVariadicFormatter(Function f, int formatParamIndex) {
exists(FunctionCall fc, int i |
variadicFormatter(fc.getTarget(), i, wide)
variadicFormatter(fc.getTarget(), i)
and fc.getEnclosingFunction() = f
and fc.getArgument(i) = f.getParameter(formatParamIndex).getAnAccess()
)
@@ -54,11 +54,11 @@ predicate callsVariadicFormatter(Function f, int formatParamIndex, boolean wide)
* Holds if `f` is a function such as `vprintf` that has a format parameter
* (at `formatParamIndex`) and a variable argument list of type `va_arg`.
*/
predicate variadicFormatter(Function f, int formatParamIndex, boolean wide) {
primitiveVariadicFormatter(f, formatParamIndex, wide)
predicate variadicFormatter(Function f, int formatParamIndex) {
primitiveVariadicFormatter(f, formatParamIndex)
or (
not f.isVarargs()
and callsVariadicFormatter(f, formatParamIndex, wide)
and callsVariadicFormatter(f, formatParamIndex)
)
}
@@ -68,12 +68,10 @@ predicate variadicFormatter(Function f, int formatParamIndex, boolean wide) {
*/
class UserDefinedFormattingFunction extends FormattingFunction {
UserDefinedFormattingFunction() {
isVarargs() and callsVariadicFormatter(this, _, _)
isVarargs() and callsVariadicFormatter(this, _)
}
override int getFormatParameterIndex() { callsVariadicFormatter(this, result, _) }
override predicate isWideCharDefault() { callsVariadicFormatter(this, _, true) }
override int getFormatParameterIndex() { callsVariadicFormatter(this, result) }
}
/**
@@ -674,8 +672,8 @@ class FormatLiteral extends Literal {
/**
* Gets the char type required by the nth conversion specifier.
* - in the base case this is the default for the formatting function
* (e.g. `char` for `printf`, `wchar_t` for `wprintf`).
* - the `%S` format character reverses wideness.
* (e.g. `char` for `printf`, `char` or `wchar_t` for `wprintf`).
* - the `%C` format character reverses wideness.
* - the size prefixes 'l'/'w' and 'h' override the type character
* to wide or single-byte characters respectively.
*/
@@ -721,8 +719,8 @@ class FormatLiteral extends Literal {
/**
* Gets the string type required by the nth conversion specifier.
* - in the base case this is the default for the formatting function
* (e.g. `char` for `printf`, `wchar_t` for `wprintf`).
* - the `%S` format character reverses wideness.
* (e.g. `char *` for `printf`, `char *` or `wchar_t *` for `wprintf`).
* - the `%S` format character reverses wideness on some platforms.
* - the size prefixes 'l'/'w' and 'h' override the type character
* to wide or single-byte characters respectively.
*/

View File

@@ -22,7 +22,7 @@ private Type stripTopLevelSpecifiersOnly(Type t) {
*/
Type getAFormatterWideType() {
exists(FormattingFunction ff |
result = stripTopLevelSpecifiersOnly(ff.getDefaultCharType()) and
result = stripTopLevelSpecifiersOnly(ff.getFormatCharType()) and
result.getSize() != 1
)
}
@@ -46,6 +46,14 @@ abstract class FormattingFunction extends Function {
/** Gets the position at which the format parameter occurs. */
abstract int getFormatParameterIndex();
/**
* Holds if this `FormattingFunction` is in a context that supports
* Microsoft rules and extensions.
*/
predicate isMicrosoft() {
getFile().compiledAsMicrosoft()
}
/**
* Holds if the default meaning of `%s` is a `wchar_t *`, rather than
* a `char *` (either way, `%S` will have the opposite meaning).
@@ -55,11 +63,10 @@ abstract class FormattingFunction extends Function {
deprecated predicate isWideCharDefault() { none() }
/**
* Gets the default character type expected for `%s` by this function. Typically
* `char` or `wchar_t`.
* Gets the character type used in the format string for this function.
*/
Type getDefaultCharType() {
result =
Type getFormatCharType() {
result =
stripTopLevelSpecifiersOnly(
stripTopLevelSpecifiersOnly(
getParameter(getFormatParameterIndex()).getType().getUnderlyingType()
@@ -67,19 +74,33 @@ abstract class FormattingFunction extends Function {
)
}
/**
* Gets the default character type expected for `%s` by this function. Typically
* `char` or `wchar_t`.
*/
Type getDefaultCharType() {
(
isMicrosoft() and
result = getFormatCharType()
) or (
not isMicrosoft() and
result instanceof PlainCharType
)
}
/**
* Gets the non-default character type expected for `%S` by this function. Typically
* `wchar_t` or `char`. On some snapshots there may be multiple results where we can't tell
* which is correct for a particular function.
*/
Type getNonDefaultCharType() {
(
getDefaultCharType().getSize() = 1 and
result = getAFormatterWideTypeOrDefault()
) or (
getDefaultCharType().getSize() > 1 and
result instanceof PlainCharType
)
(
getDefaultCharType().getSize() = 1 and
result = getWideCharType()
) or (
not getDefaultCharType().getSize() = 1 and
result instanceof PlainCharType
)
}
/**
@@ -89,10 +110,12 @@ abstract class FormattingFunction extends Function {
*/
Type getWideCharType() {
(
result = getDefaultCharType() or
result = getNonDefaultCharType()
) and
result.getSize() > 1
result = getFormatCharType() and
result.getSize() > 1
) or (
not getFormatCharType().getSize() > 1 and
result = getAFormatterWideTypeOrDefault() // may have more than one result
)
}
/**

View File

@@ -0,0 +1,41 @@
import cpp
private import semmle.code.cpp.rangeanalysis.RangeSSA
/**
* Holds if `guard` won't return the value `polarity` when either
* operand is NaN.
*/
predicate nanExcludingComparison(ComparisonOperation guard, boolean polarity) {
polarity = true and
(
guard instanceof LTExpr or
guard instanceof LEExpr or
guard instanceof GTExpr or
guard instanceof GEExpr or
guard instanceof EQExpr
)
or
polarity = false and
guard instanceof NEExpr
}
/**
* Holds if `v` is a use of an SSA definition in `def` which cannot be NaN,
* by virtue of the guard in `def`.
*/
private predicate excludesNan(RangeSsaDefinition def, VariableAccess v) {
exists(VariableAccess inCond, ComparisonOperation guard, boolean branch, LocalScopeVariable lsv |
def.isGuardPhi(inCond, guard, branch) and
inCond.getTarget() = lsv and
v = def.getAUse(lsv) and
guard.getAnOperand() = inCond and
nanExcludingComparison(guard, branch)
)
}
/**
* A variable access which cannot be NaN.
*/
class NonNanVariableAccess extends VariableAccess {
NonNanVariableAccess() { excludesNan(_, this) }
}

View File

@@ -45,6 +45,7 @@ import cpp
private import RangeAnalysisUtils
import RangeSSA
import SimpleRangeAnalysisCached
private import NanAnalysis
/**
* This fixed set of lower bounds is used when the lower bounds of an
@@ -993,6 +994,25 @@ predicate unanalyzableDefBounds(
ub = varMaxVal(v)
}
/**
* Holds if in the `branch` branch of a guard `guard` involving `v`,
* we know that `v` is not NaN, and therefore it is safe to make range
* inferences about `v`.
*/
bindingset[guard, v, branch]
predicate nonNanGuardedVariable(ComparisonOperation guard, VariableAccess v, boolean branch) {
v.getType().getUnspecifiedType() instanceof IntegralType
or
v.getType().getUnspecifiedType() instanceof FloatingPointType and v instanceof NonNanVariableAccess
or
// The reason the following case is here is to ensure that when we say
// `if (x > 5) { ...then... } else { ...else... }`
// it is ok to conclude that `x > 5` in the `then`, (though not safe
// to conclude that x <= 5 in `else`) even if we had no prior
// knowledge of `x` not being `NaN`.
nanExcludingComparison(guard, branch)
}
/**
* If the guard is a comparison of the form `p*v + q <CMP> r`, then this
* predicate uses the bounds information for `r` to compute a lower bound
@@ -1004,10 +1024,12 @@ predicate lowerBoundFromGuard(
) {
exists (float childLB, RelationStrictness strictness
| boundFromGuard(guard, v, childLB, true, strictness, branch)
| if (strictness = Nonstrict() or
not (v.getType().getUnspecifiedType() instanceof IntegralType))
then lb = childLB
else lb = childLB+1)
| if nonNanGuardedVariable(guard, v, branch)
then (if (strictness = Nonstrict() or
not (v.getType().getUnspecifiedType() instanceof IntegralType))
then lb = childLB
else lb = childLB+1)
else lb = varMinVal(v.getTarget()))
}
/**
@@ -1021,10 +1043,12 @@ predicate upperBoundFromGuard(
) {
exists (float childUB, RelationStrictness strictness
| boundFromGuard(guard, v, childUB, false, strictness, branch)
| if (strictness = Nonstrict() or
not (v.getType().getUnspecifiedType() instanceof IntegralType))
then ub = childUB
else ub = childUB-1)
| if nonNanGuardedVariable(guard, v, branch)
then (if (strictness = Nonstrict() or
not (v.getType().getUnspecifiedType() instanceof IntegralType))
then ub = childUB
else ub = childUB-1)
else ub = varMaxVal(v.getTarget()))
}
/**

View File

@@ -4,7 +4,7 @@ import external.ExternalArtifact
predicate printfLikeFunction(Function func, int formatArg) {
(formatArg = func.(FormattingFunction).getFormatParameterIndex() and not func instanceof UserDefinedFormattingFunction)
or
primitiveVariadicFormatter(func, formatArg, _)
primitiveVariadicFormatter(func, formatArg)
or
exists(ExternalData data |
// TODO Do this \ to / conversion in the toolchain?

View File

@@ -245,9 +245,14 @@ predicate insideFunctionValueMoveTo(Element src, Element dest)
and format.getConversionChar(arg - formattingSend.getTarget().getNumberOfParameters()) = argFormat
and (argFormat = "s" or argFormat = "S" or argFormat = "@"))
// Expressions computed from tainted data are also tainted
or (exists (FunctionCall call | dest = call and isPureFunction(call.getTarget().getName()) |
call.getAnArgument() = src
and forall(Expr arg | arg = call.getAnArgument() | arg = src or predictable(arg))))
or exists(FunctionCall call | dest = call and isPureFunction(call.getTarget().getName()) |
call.getAnArgument() = src and
forall(Expr arg | arg = call.getAnArgument() | arg = src or predictable(arg)) and
// flow through `strlen` tends to cause dubious results, if the length is
// bounded.
not call.getTarget().getName() = "strlen"
)
or exists(Element a, Element b |
moveToDependingOnSide(a, b) and
if insideValueSource(a) then