mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Initial working draft of non-const source refactor.
This commit is contained in:
@@ -18,115 +18,80 @@
|
||||
import semmle.code.cpp.ir.dataflow.TaintTracking
|
||||
import semmle.code.cpp.commons.Printf
|
||||
import semmle.code.cpp.security.FlowSources
|
||||
import semmle.code.cpp.ir.dataflow.internal.ModelUtil
|
||||
import semmle.code.cpp.models.interfaces.DataFlow
|
||||
import semmle.code.cpp.models.interfaces.Taint
|
||||
import semmle.code.cpp.ir.implementation.raw.Instruction
|
||||
|
||||
class UncalledFunction extends Function {
|
||||
UncalledFunction() { not exists(Call c | c.getTarget() = this) }
|
||||
UncalledFunction() {
|
||||
not exists(Call c | c.getTarget() = this) and
|
||||
not this.(MemberFunction).overrides(_)
|
||||
}
|
||||
}
|
||||
|
||||
// For the following `...gettext` functions, we assume that
|
||||
// all translations preserve the type and order of `%` specifiers
|
||||
// (and hence are safe to use as format strings). This
|
||||
// assumption is hard-coded into the query.
|
||||
predicate whitelistFunction(Function f, int arg) {
|
||||
// basic variations of gettext
|
||||
f.getName() = "_" and arg = 0
|
||||
/**
|
||||
* Holds if `node` is a non-constant source of data flow.
|
||||
* This is defined as either:
|
||||
* 1) a `FlowSource`
|
||||
* 2) a parameter of an 'uncalled' function (i.e., a function that is not called in the program)
|
||||
* 3) an argument to a function with no definition that is not known to define the output through its input
|
||||
* 4) an out arg of a function with no definition that is not known to define the output through its input
|
||||
*
|
||||
* The latter two cases address identifying standard string manipulation libraries as input sources
|
||||
* e.g., strcpy, but it will identify unknown function calls as possible non-constant source
|
||||
* since it cannot be determined if the out argument or return is constant.
|
||||
*/
|
||||
predicate isNonConst(DataFlow::Node node) {
|
||||
node instanceof FlowSource
|
||||
or
|
||||
f.getName() = "gettext" and arg = 0
|
||||
exists(UncalledFunction f | f.getAParameter() = node.asParameter())
|
||||
or
|
||||
f.getName() = "dgettext" and arg = 1
|
||||
or
|
||||
f.getName() = "dcgettext" and arg = 1
|
||||
or
|
||||
// plural variations of gettext that take one format string for singular and another for plural form
|
||||
f.getName() = "ngettext" and
|
||||
(arg = 0 or arg = 1)
|
||||
or
|
||||
f.getName() = "dngettext" and
|
||||
(arg = 1 or arg = 2)
|
||||
or
|
||||
f.getName() = "dcngettext" and
|
||||
(arg = 1 or arg = 2)
|
||||
}
|
||||
|
||||
// we assume that ALL uses of the `_` macro
|
||||
// return constant string literals
|
||||
predicate underscoreMacro(Expr e) {
|
||||
exists(MacroInvocation mi |
|
||||
mi.getMacroName() = "_" and
|
||||
mi.getExpr() = e
|
||||
// Consider as an input any out arg of a function or a function's return where the function is not:
|
||||
// 1. a function with a known dataflow or taintflow from input to output and the `node` is the output
|
||||
// 2. a function where there is a known definition
|
||||
// i.e., functions that with unknown bodies and are not known to define the output through its input
|
||||
// are considered as possible non-const sources
|
||||
(
|
||||
node instanceof DataFlow::DefinitionByReferenceNode or
|
||||
node.asIndirectExpr() instanceof Call
|
||||
) and
|
||||
not exists(Function func, FunctionInput input, FunctionOutput output, CallInstruction call |
|
||||
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
|
||||
// variant function's output are now possible non-const sources
|
||||
(
|
||||
func.(DataFlowFunction).hasDataFlow(input, output) or
|
||||
func.(TaintFunction).hasTaintFlow(input, output)
|
||||
) and
|
||||
node = callOutput(call, output) and
|
||||
call.getStaticCallTarget() = func
|
||||
) and
|
||||
not exists(Call c |
|
||||
c.getTarget().hasDefinition() and
|
||||
if node instanceof DataFlow::DefinitionByReferenceNode
|
||||
then c.getAnArgument() = node.asDefiningArgument()
|
||||
else c = [node.asExpr(), node.asIndirectExpr()]
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `t` cannot hold a character array, directly or indirectly.
|
||||
* Holds if `sink` is a sink is a format string of any
|
||||
* `FormattingFunctionCall`.
|
||||
*/
|
||||
predicate cannotContainString(Type t, boolean isIndirect) {
|
||||
isIndirect = false and
|
||||
exists(Type unspecified |
|
||||
unspecified = t.getUnspecifiedType() and
|
||||
not unspecified instanceof UnknownType
|
||||
|
|
||||
unspecified instanceof BuiltInType or
|
||||
unspecified instanceof IntegralOrEnumType
|
||||
)
|
||||
}
|
||||
|
||||
predicate isNonConst(DataFlow::Node node, boolean isIndirect) {
|
||||
exists(Expr e |
|
||||
e = node.asExpr() and isIndirect = false
|
||||
or
|
||||
e = node.asIndirectExpr() and isIndirect = true
|
||||
|
|
||||
exists(FunctionCall fc | fc = e |
|
||||
not (
|
||||
whitelistFunction(fc.getTarget(), _) or
|
||||
fc.getTarget().hasDefinition()
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(Variable v | v = e.(VariableAccess).getTarget() |
|
||||
v.getType().(ArrayType).getBaseType() instanceof CharType and
|
||||
exists(AssignExpr ae |
|
||||
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(UncalledFunction f, Parameter p | f.getAParameter() = p |
|
||||
p = e.(VariableAccess).getTarget()
|
||||
)
|
||||
or
|
||||
node instanceof FlowSource
|
||||
or
|
||||
node instanceof DataFlow::DefinitionByReferenceNode and
|
||||
not exists(FormattingFunctionCall fc | node.asDefiningArgument() = fc.getOutputArgument(_)) and
|
||||
not exists(Call c |
|
||||
c.getAnArgument() = node.asDefiningArgument() and c.getTarget().hasDefinition()
|
||||
)
|
||||
)
|
||||
or
|
||||
node instanceof DataFlow::DefinitionByReferenceNode and
|
||||
isIndirect = true
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
predicate isBarrierNode(DataFlow::Node node) {
|
||||
underscoreMacro([node.asExpr(), node.asIndirectExpr()])
|
||||
or
|
||||
exists(node.asExpr()) and
|
||||
cannotContainString(node.getType(), false)
|
||||
}
|
||||
|
||||
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
|
||||
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
|
||||
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
|
||||
}
|
||||
|
||||
module NonConstFlowConfig implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node source) { isNonConst(source, _) }
|
||||
predicate isSource(DataFlow::Node source) { isNonConst(source) }
|
||||
|
||||
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) }
|
||||
predicate isBarrier(DataFlow::Node node) {
|
||||
// Ignore tracing non-const through array indices
|
||||
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
|
||||
}
|
||||
}
|
||||
|
||||
module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;
|
||||
|
||||
@@ -23,7 +23,7 @@ extern char *dcngettext (const char *__domainname, const char *__msgid1,
|
||||
extern char *any_random_function(const char *);
|
||||
|
||||
#define NULL ((void*)0)
|
||||
#define _(X) any_random_function((X))
|
||||
#define _(X) gettext(X)
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if(argc > 1)
|
||||
@@ -40,10 +40,9 @@ int main(int argc, char **argv) {
|
||||
printf(gettext("%d arguments\n"), argc-1); // GOOD
|
||||
printf(any_random_function("%d arguments\n"), argc-1); // BAD
|
||||
|
||||
// Even though `_` is mapped to `some_random_function` above,
|
||||
// the following call should not be flagged.
|
||||
printf(_(any_random_function("%d arguments\n")),
|
||||
argc-1); // GOOD
|
||||
|
||||
|
||||
printf(_(any_random_function("%d arguments\n")), argc-1); // BAD
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ extern "C" int snprintf ( char * s, int n, const char * format, ... );
|
||||
struct A {
|
||||
void do_print(const char *fmt0) {
|
||||
char buf[32];
|
||||
snprintf(buf, 32, fmt0); // GOOD [FALSE POSITIVE]
|
||||
snprintf(buf, 32, fmt0); // BAD through call from c.do_some_printing(c.ext_fmt_str())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -39,7 +39,7 @@ struct C {
|
||||
|
||||
void foo(void) {
|
||||
C c;
|
||||
c.do_some_printing(c.ext_fmt_str()); // BAD [NOT DETECTED]
|
||||
c.do_some_printing(c.ext_fmt_str());
|
||||
}
|
||||
|
||||
struct some_class {
|
||||
|
||||
@@ -54,66 +54,66 @@ int main(int argc, char **argv) {
|
||||
{
|
||||
char hello[] = "hello, World\n";
|
||||
hello[0] = 'H';
|
||||
printf(hello); // BAD
|
||||
printf(hello); // GOOD
|
||||
printf(_(hello)); // GOOD
|
||||
printf(gettext(hello)); // GOOD
|
||||
printf(const_wash(hello)); // BAD
|
||||
printf((hello + 1) + 1); // BAD
|
||||
printf(+hello); // BAD
|
||||
printf(*&hello); // BAD
|
||||
printf(&*hello); // BAD
|
||||
printf((char*)(void*)+(hello+1) + 1); // BAD
|
||||
printf(const_wash(hello)); // GOOD
|
||||
printf((hello + 1) + 1); // GOOD
|
||||
printf(+hello); // GOOD
|
||||
printf(*&hello); // GOOD
|
||||
printf(&*hello); // GOOD
|
||||
printf((char*)(void*)+(hello+1) + 1); // GOOD
|
||||
}
|
||||
printf(("Hello, World\n" + 1) + 1); // BAD
|
||||
printf(("Hello, World\n" + 1) + 1); // GOOD
|
||||
{
|
||||
const char *hello = "Hello, World\n";
|
||||
printf(hello + 1); // BAD
|
||||
printf(hello + 1); // GOOD
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
{
|
||||
const char *hello = "Hello, World\n";
|
||||
hello += 1;
|
||||
printf(hello); // BAD
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "x = x + 1" syntax
|
||||
const char *hello = "Hello, World\n";
|
||||
hello = hello + 1;
|
||||
printf(hello); // BAD
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "x++" syntax
|
||||
const char *hello = "Hello, World\n";
|
||||
hello++;
|
||||
printf(hello); // BAD
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "++x" as subexpression
|
||||
const char *hello = "Hello, World\n";
|
||||
printf(++hello); // BAD
|
||||
printf(++hello); // GOOD
|
||||
}
|
||||
{
|
||||
// Same as above block but through a pointer
|
||||
const char *hello = "Hello, World\n";
|
||||
const char **p = &hello;
|
||||
(*p)++;
|
||||
printf(hello); // BAD
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
{
|
||||
// Same as above block but through a C++ reference
|
||||
const char *hello = "Hello, World\n";
|
||||
const char *&p = hello;
|
||||
p++;
|
||||
printf(hello); // BAD [NOT DETECTED]
|
||||
printf(hello); // GOOD
|
||||
}
|
||||
if (gettext_debug) {
|
||||
printf(new char[100]); // BAD
|
||||
printf(new char[100]); // BAD [FALSE NEGATIVE]
|
||||
}
|
||||
{
|
||||
const char *hello = "Hello, World\n";
|
||||
const char *const *p = &hello; // harmless reference to const pointer
|
||||
printf(hello); // GOOD [FALSE POSITIVE]
|
||||
hello++; // modification comes after use and so does no harm
|
||||
const char *const *p = &hello;
|
||||
printf(hello); // GOOD
|
||||
hello++;
|
||||
}
|
||||
printf(argc > 2 ? "More than one\n" : _("Only one\n")); // GOOD
|
||||
|
||||
@@ -154,7 +154,7 @@ void print_ith_message() {
|
||||
|
||||
void fmt_via_strcpy(char *data) {
|
||||
strcpy(data, "some string");
|
||||
printf(data); // BAD
|
||||
printf(data); // GOOD [FALSE POSITIVE: Due to inaccurate dataflow killers]
|
||||
}
|
||||
|
||||
void fmt_with_assignment() {
|
||||
@@ -163,3 +163,87 @@ void fmt_with_assignment() {
|
||||
x = y = "a";
|
||||
printf(y); // GOOD
|
||||
}
|
||||
|
||||
void fmt_via_strcpy_bad(char *data) {
|
||||
char res[100];
|
||||
strcpy(res, data);
|
||||
printf(res); // BAD
|
||||
}
|
||||
|
||||
|
||||
int wprintf(const wchar_t *format,...);
|
||||
typedef wchar_t *STRSAFE_LPWSTR;
|
||||
typedef const wchar_t *STRSAFE_LPCWSTR;
|
||||
typedef unsigned int size_t;
|
||||
|
||||
void StringCchPrintfW(
|
||||
STRSAFE_LPWSTR pszDest,
|
||||
size_t cchDest,
|
||||
STRSAFE_LPCWSTR pszFormat,
|
||||
...
|
||||
);
|
||||
|
||||
void wchar_t_test_good(){
|
||||
wchar_t wstr[100];
|
||||
StringCchPrintfW(wstr, 100, L"STRING"); // GOOD
|
||||
|
||||
wprintf(wstr); // GOOD
|
||||
}
|
||||
|
||||
void wchar_t_test_bad(wchar_t* str){
|
||||
wchar_t wstr[100];
|
||||
StringCchPrintfW(wstr, 100, str); // BAD
|
||||
|
||||
wprintf(wstr); // BAD
|
||||
}
|
||||
|
||||
const char* get_string();
|
||||
|
||||
void pointer_arithmetic_test_on_bad_string(){
|
||||
{
|
||||
const char *hello = get_string();
|
||||
printf(hello + 1); // BAD
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
const char *hello = get_string();
|
||||
hello += 1;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "x = x + 1" syntax
|
||||
const char *hello = get_string();
|
||||
hello = hello + 1;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "x++" syntax
|
||||
const char *hello = get_string();
|
||||
hello++;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
// Same as above block but using "++x" as subexpression
|
||||
const char *hello = get_string();
|
||||
printf(++hello); // BAD
|
||||
}
|
||||
{
|
||||
// Same as above block but through a pointer
|
||||
const char *hello = get_string();
|
||||
const char **p = &hello;
|
||||
(*p)++;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
// Same as above block but through a C++ reference
|
||||
const char *hello = get_string();
|
||||
const char *&p = hello;
|
||||
p++;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
{
|
||||
const char *hello = get_string();
|
||||
const char *const *p = &hello;
|
||||
printf(hello); // BAD
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user