[CPP-370] Tentative implementation of NonConstantFormat.ql using the global

DataFlow library.  This is intended solely for further discussion.
This commit is contained in:
Ziemowit Laski
2019-04-22 17:42:58 -07:00
parent 21eb00a5df
commit 0c86d4c112
4 changed files with 82 additions and 218 deletions

View File

@@ -13,206 +13,62 @@
* security
* external/cwe/cwe-134
*/
import cpp
/**
* Holds if `t` is a character array or pointer, where `charType` is the type of
* characters in `t`.
*/
predicate stringType(Type t, Type charType) {
(
( charType = t.(PointerType).getBaseType() or
charType = t.(ArrayType).getBaseType()
) and (
charType.getUnspecifiedType() instanceof CharType or
charType.getUnspecifiedType() instanceof Wchar_t
)
)
or
stringType(t.getUnderlyingType(), charType)
or
stringType(t.(SpecifiedType).getBaseType(), charType)
}
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.commons.Printf
predicate gettextFunction(Function f, int arg) {
predicate whitelistFunction(Function f, int arg) {
// basic variations of gettext
(f.getName() = "_" and arg = 0) or
(f.getName() = "gettext" and arg = 0) or
(f.getName() = "dgettext" and arg = 1) or
(f.getName() = "dcgettext" and arg = 1) or
f.getName() = "_" and arg = 0
or
f.getName() = "gettext" and arg = 0
or
f.getName() = "dgettext" and arg = 1
or
f.getName() = "dcgettext" and arg = 1
or
// plural variations of gettext that take one format string for singular and another for plural form
(f.getName() = "ngettext" and (arg = 0 or arg = 1)) or
(f.getName() = "dngettext" and (arg = 1 or arg = 2)) or
(f.getName() = "dcngettext" and (arg = 1 or arg = 2))
}
predicate stringArray(Variable arr, AggregateLiteral init) {
arr.getInitializer().getExpr() = init and
stringType(arr.getType().getUnspecifiedType().(ArrayType).getBaseType(), _)
// Ideally, this predicate should also check that no item of `arr` is ever
// reassigned, but such an analysis could get fairly complicated. Instead, we
// just hope that nobody would initialize an array of constants and then
// overwrite some of them with untrusted data.
}
predicate underscoreMacro(Expr e) {
exists(MacroInvocation mi |
mi.getMacroName() = "_" and
mi.getExpr() = e
)
}
/**
* Holds if a value of character pointer type may flow _directly_ from `src` to
* `dst`.
*/
predicate stringFlowStep(Expr src, Expr dst) {
not underscoreMacro(dst)
and
stringType(dst.getType(), _)
and
(
src = dst.(VariableAccess).getTarget().getAnAssignedValue()
or
src = dst.(ConditionalExpr).getThen()
or
src = dst.(ConditionalExpr).getElse()
or
src = dst.(AssignExpr).getRValue()
or
src = dst.(CommaExpr).getRightOperand()
or
src = dst.(UnaryPlusExpr).getOperand()
or
stringArray(dst.(ArrayExpr).getArrayBase().(VariableAccess).getTarget(),
src.getParent())
or
// Follow a parameter to its arguments in all callers.
exists(Parameter p | p = dst.(VariableAccess).getTarget() |
src = p.getFunction().getACallToThisFunction().getArgument(p.getIndex())
)
or
// Follow a call to a gettext function without looking at its body even if
// the body is known. This ensures that we report the error in the relevant
// location rather than inside the body of some `_` function.
exists(Function gettext, FunctionCall call, int idx |
gettextFunction(gettext, idx) and
dst = call and
gettext = call.getTarget()
| src = call.getArgument(idx)
)
or
// Follow a call to a non-gettext function through its return statements.
exists(Function f, ReturnStmt retStmt |
f = dst.(FunctionCall).getTarget() and
f = retStmt.getEnclosingFunction() and
not gettextFunction(f, _)
| src = retStmt.getExpr()
)
)
}
/** Holds if `v` may be written to, other than through `AssignExpr`. */
predicate nonConstVariable(Variable v) {
exists(Type charType |
stringType(v.getType(), charType) and not charType.isConst()
)
f.getName() = "ngettext" and
(arg = 0 or arg = 1)
or
exists(AssignPointerAddExpr e |
e.getLValue().(VariableAccess).getTarget() = v
)
f.getName() = "dngettext" and
(arg = 1 or arg = 2)
or
exists(AssignPointerSubExpr e |
e.getLValue().(VariableAccess).getTarget() = v
)
or
exists(CrementOperation e | e.getOperand().(VariableAccess).getTarget() = v)
or
exists(AddressOfExpr e | e.getOperand().(VariableAccess).getTarget() = v)
or
exists(ReferenceToExpr e | e.getExpr().(VariableAccess).getTarget() = v)
}
/**
* Holds if this expression is _directly_ considered non-constant for the
* purpose of this query.
*
* Each case of `Expr` that may have string type should be listed either in
* `nonConst` or `stringFlowStep`. Omitting a case leads to false negatives.
* Having a case in both places leads to unnecessary computation.
*/
predicate nonConst(Expr e) {
nonConstVariable(e.(VariableAccess).getTarget())
or
e instanceof CrementOperation
or
e instanceof AssignPointerAddExpr
or
e instanceof AssignPointerSubExpr
or
e instanceof PointerArithmeticOperation
or
e instanceof FieldAccess
or
e instanceof PointerDereferenceExpr
or
e instanceof AddressOfExpr
or
e instanceof ExprCall
or
exists(ArrayExpr ae | ae = e |
not stringArray(ae.getArrayBase().(VariableAccess).getTarget(), _)
)
or
e instanceof NewArrayExpr
or
// e is a call to a function whose definition we cannot see. We assume it is
// not constant.
exists(Function f | f = e.(FunctionCall).getTarget() |
not f.hasDefinition()
)
or
// e is a parameter of a function that's never called. If it were called, we
// would instead have followed the call graph in `stringFlowStep`.
exists(Function f
| f = e.(VariableAccess).getTarget().(Parameter).getFunction()
| not exists(f.getACallToThisFunction())
)
}
predicate formattingFunctionArgument(
FormattingFunction ff, FormattingFunctionCall fc, Expr arg)
{
fc.getTarget() = ff and
fc.getArgument(ff.getFormatParameterIndex()) = arg and
// Don't look for errors inside functions that are themselves formatting
// functions. We expect that the interesting errors will be in their callers.
not fc.getEnclosingFunction() instanceof FormattingFunction
}
// Reflexive-transitive closure of `stringFlow`, restricting the base case to
// only consider destination expressions that are arguments to formatting
// functions.
predicate stringFlow(Expr src, Expr dst) {
formattingFunctionArgument(_, _, dst) and src = dst
or
exists(Expr mid | stringFlowStep(src, mid) and stringFlow(mid, dst))
f.getName() = "dcngettext" and
(arg = 1 or arg = 2)
}
predicate whitelisted(Expr e) {
gettextFunction(e.(FunctionCall).getTarget(), _)
exists(FunctionCall fc, int arg | fc = e.(FunctionCall) |
whitelistFunction(fc.getTarget(), arg) and
isConst(fc.getArgument(arg))
)
}
predicate isConst(Expr e) {
e instanceof StringLiteral
or
underscoreMacro(e)
whitelisted(e)
}
predicate flowFromNonConst(Expr src, Expr dst) {
stringFlow(src, dst) and
nonConst(src) and
not whitelisted(src)
class ConstFlow extends DataFlow::Configuration {
ConstFlow() { this = "ConstFlow" }
override predicate isSource(DataFlow::Node source) {
isConst(source.asExpr())
}
override predicate isSink(DataFlow::Node sink) {
exists(FormattingFunctionCall fc |
sink.asExpr() = fc.getArgument(fc.getFormatParameterIndex())
)
}
}
from FormattingFunctionCall fc, FormattingFunction ff, Expr format
where formattingFunctionArgument(ff, fc, format) and
flowFromNonConst(_, format) and
not fc.isInMacroExpansion()
select format, "The format string argument to " + ff.getName() + " should be constant to prevent security issues and other potential errors."
from FormattingFunctionCall call, Expr formatString
where call.getArgument(call.getFormatParameterIndex()) = formatString
and not exists(ConstFlow cf, DataFlow::Node source, DataFlow::Node sink |
cf.hasFlow(source, sink) and
sink.asExpr() = formatString
)
select call, "The format string argument to " + call.getTarget().getQualifiedName() + " should be constant to prevent security issues and other potential errors."

View File

@@ -1,5 +1,9 @@
extern int printf(const char *fmt, ...);
// For the following `...gettext` functions, we assume that
// all translations preserve the type and order of `%` specifiers
// (and hence are safe to use as format strings). This is
// assumption is hard-coded into the query.
extern char *gettext (const char *__msgid);
@@ -8,7 +12,6 @@ extern char *dgettext (const char *__domainname, const char *__msgid);
extern char *dcgettext (const char *__domainname,
const char *__msgid, int __category);
extern char *ngettext (const char *__msgid1, const char *__msgid2,
unsigned long int __n);
@@ -23,7 +26,9 @@ extern char *dcngettext (const char *__domainname, const char *__msgid1,
extern char *any_random_function(const char *);
#define NULL ((void*)0)
#define _(X) any_random_function((X))
// The following is the recommended use for the `_` macro.
#define _(X) gettext(X)
int main(int argc, char **argv) {
if(argc > 1)
@@ -40,10 +45,10 @@ int main(int argc, char **argv) {
printf(gettext("%d arguments\n"), argc-1); // ok
printf(any_random_function("%d arguments\n"), argc-1); // not ok
// Our query can't look inside the argument to a macro, so it fails to
// flag this call.
// Our query also supports looking for `_` as a function.
#undef _
printf(_(any_random_function("%d arguments\n")),
argc-1); // not ok [NOT REPORTED]
argc-1); // not ok
return 0;
}

View File

@@ -1,23 +1,22 @@
| NonConstantFormat.c:30:10:30:16 | access to array | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| NonConstantFormat.c:41:9:41:27 | call to any_random_function | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:45:10:45:21 | call to make_message | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:50:12:50:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:51:12:51:12 | call to _ | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:52:12:52:18 | call to gettext | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:53:12:53:21 | call to const_wash | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:54:12:54:26 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:55:12:55:17 | + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:56:12:56:18 | * ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:57:12:57:18 | & ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:58:12:58:39 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:60:10:60:35 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:63:12:63:20 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:69:12:69:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:75:12:75:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:81:12:81:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:86:12:86:18 | ++ ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:93:12:93:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:100:12:100:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:103:12:103:24 | new[] | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:108:12:108:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:117:10:117:19 | call to const_wash | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| NonConstantFormat.c:35:3:35:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| NonConstantFormat.c:46:2:46:7 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| NonConstantFormat.c:50:2:50:7 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:45:3:45:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:46:3:46:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:47:3:47:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:48:3:48:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:54:5:54:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:56:5:56:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:57:5:57:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:58:5:58:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:59:5:59:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:60:5:60:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:61:5:61:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:62:5:62:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:64:3:64:8 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:67:5:67:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:73:5:73:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:79:5:79:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:85:5:85:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:90:5:90:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |
| test.cpp:107:5:107:10 | call to printf | The format string argument to printf should be constant to prevent security issues and other potential errors. |

View File

@@ -23,7 +23,7 @@ const char *choose_message(unsigned int n) {
const char *make_message(unsigned int n) {
static char buf[64];
sprintf(buf, "%d tasks left\n", n);
sprintf(buf, "%d tasks left\n", n); // ok
return buf;
}
@@ -41,8 +41,12 @@ const char *const_wash(char *str) {
}
int main(int argc, char **argv) {
const char *message = messages[2];
printf(choose_message(argc - 1), argc - 1); // OK
printf(messages[1]); // OK
printf(message); // OK
printf(make_message(argc - 1)); // NOT OK
printf("Hello, World\n"); // OK
printf(_("Hello, World\n")); // OK
{
char hello[] = "hello, World\n";