Merge pull request #15513 from microsoft/50-model-gettext-family-of-string-operations

Added model for gettext variants.
This commit is contained in:
Mathias Vorreiter Pedersen
2024-02-07 15:48:16 +00:00
committed by GitHub
6 changed files with 153 additions and 16 deletions

View File

@@ -3,6 +3,7 @@ private import implementations.Deallocation
private import implementations.Fread
private import implementations.Getenv
private import implementations.Gets
private import implementations.GetText
private import implementations.IdentityFunction
private import implementations.Inet
private import implementations.Iterator

View File

@@ -0,0 +1,33 @@
import semmle.code.cpp.models.interfaces.DataFlow
/**
* Returns the transated text index for a given gettext function `f`
*/
private int getTextArg(Function f) {
// basic variations of gettext
f.hasGlobalOrStdName("gettext") and result = 0
or
f.hasGlobalOrStdName("dgettext") and result = 1
or
f.hasGlobalOrStdName("dcgettext") and result = 1
or
// plural variations of gettext that take one format string for singular and another for plural form
f.hasGlobalOrStdName("ngettext") and
(result = 0 or result = 1)
or
f.hasGlobalOrStdName("dngettext") and
(result = 1 or result = 2)
or
f.hasGlobalOrStdName("dcngettext") and
(result = 1 or result = 2)
}
class GetTextFunction extends DataFlowFunction {
int argInd;
GetTextFunction() { argInd = getTextArg(this) }
override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
input.isParameterDeref(argInd) and output.isReturnValueDeref()
}
}

View File

@@ -16,6 +16,7 @@
*/
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.models.implementations.GetText
import semmle.code.cpp.commons.Printf
// For the following `...gettext` functions, we assume that
@@ -26,30 +27,21 @@ predicate whitelistFunction(Function f, int arg) {
// basic variations of gettext
f.getName() = "_" and arg = 0
or
f.getName() = "gettext" and arg = 0
or
f.getName() = "dgettext" and arg = 1
or
f.getName() = "dcgettext" and arg = 1
or
// plural variations of gettext that take one format string for singular and another for plural form
f.getName() = "ngettext" and
(arg = 0 or arg = 1)
or
f.getName() = "dngettext" and
(arg = 1 or arg = 2)
or
f.getName() = "dcngettext" and
(arg = 1 or arg = 2)
exists(FunctionInput input |
f.(GetTextFunction).hasDataFlow(input, _) and
input.isParameterDeref(arg)
)
}
// we assume that ALL uses of the `_` macro
// we assume that ALL uses of the `_` macro (and calls to `gettext`)
// return constant string literals
predicate underscoreMacro(Expr e) {
exists(MacroInvocation mi |
mi.getMacroName() = "_" and
mi.getExpr() = e
)
or
e = any(GetTextFunction gettext).getACallToThisFunction()
}
/**

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added dataflow models for the `gettext` function variants.

View File

@@ -303,6 +303,12 @@ irFlow
| test.cpp:914:46:914:53 | source | test.cpp:919:10:919:30 | global_pointer_static |
| test.cpp:915:57:915:76 | *indirect_source(1) | test.cpp:921:19:921:50 | *global_pointer_static_indirect_1 |
| test.cpp:932:23:932:28 | call to source | test.cpp:937:10:937:24 | * ... |
| test.cpp:958:18:958:32 | *call to indirect_source | test.cpp:961:19:961:28 | *translated |
| test.cpp:973:18:973:32 | *call to indirect_source | test.cpp:977:19:977:28 | *translated |
| test.cpp:994:18:994:32 | *call to indirect_source | test.cpp:999:19:999:28 | *translated |
| test.cpp:994:18:994:32 | *call to indirect_source | test.cpp:1003:19:1003:28 | *translated |
| test.cpp:1021:18:1021:32 | *call to indirect_source | test.cpp:1027:19:1027:28 | *translated |
| test.cpp:1021:18:1021:32 | *call to indirect_source | test.cpp:1031:19:1031:28 | *translated |
| true_upon_entry.cpp:9:11:9:16 | call to source | true_upon_entry.cpp:13:8:13:8 | x |
| true_upon_entry.cpp:17:11:17:16 | call to source | true_upon_entry.cpp:21:8:21:8 | x |
| true_upon_entry.cpp:27:9:27:14 | call to source | true_upon_entry.cpp:29:8:29:8 | x |

View File

@@ -936,4 +936,105 @@ namespace global_variable_conflation_test {
sink(global_pointer); // clean
sink(*global_pointer); // $ ir MISSING: ast
}
}
char* gettext(const char*);
char* dgettext(const char*, const char*);
char* ngettext(const char*, const char*, unsigned long int);
char* dngettext (const char*, const char *, const char *, unsigned long int);
namespace test_gettext {
char* source();
char* indirect_source();
void test_gettext() {
char* data = source();
char* translated = gettext(data);
sink(translated); // clean
indirect_sink(translated); // clean
}
void indirect_test_dgettext() {
char* data = indirect_source();
char* translated = gettext(data);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
}
void test_dgettext() {
char* data = source();
char* domain = source(); // Should not trace from this source
char* translated = dgettext(domain, data);
sink(translated); // clean
indirect_sink(translated); // clean
}
void indirect_test_gettext() {
char* data = indirect_source();
char* domain = indirect_source(); // Should not trace from this source
char* translated = dgettext(domain, data);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
}
void test_ngettext() {
char* data = source();
char* np = nullptr; // Don't coun't as a source
char* translated = ngettext(data, np, 0);
sink(translated); // clean
indirect_sink(translated); // clean
translated = ngettext(np, data, 0);
sink(translated); // clean
indirect_sink(translated); // clean
}
void indirect_test_ngettext() {
char* data = indirect_source();
char* np = nullptr; // Don't coun't as a source
char* translated = ngettext(data, np, 0);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
translated = ngettext(np, data, 0);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
}
void test_dngettext() {
char* data = source();
char* np = nullptr; // Don't coun't as a source
char* domain = source(); // Should not trace from this source
char* translated = dngettext(domain, data, np, 0);
sink(translated); // clean
indirect_sink(translated); // clean
translated = dngettext(domain, np, data, 0);
sink(translated); // clean
indirect_sink(translated); // clean
}
void indirect_test_dngettext() {
char* data = indirect_source();
char* np = nullptr; // Don't coun't as a source
char* domain = indirect_source(); // Should not trace from this source
char* translated = dngettext(domain, data, np, 0);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
translated = dngettext(domain, np, data, 0);
sink(translated); // clean
indirect_sink(translated); // $ ir MISSING: ast
}
void indirect_test_gettext_no_flow_from_domain() {
char* domain = source(); // Should not trace from this source
char* translated = dgettext(domain, nullptr);
sink(translated); // clean
indirect_sink(translated); // clean
}
}