Merge pull request #6184 from github/rdmarsh2/improve-exec-tainted

C++: Refactor ExecTainted.ql to only report results after string concatenation
This commit is contained in:
Jonas Jensen
2021-09-29 19:21:13 +02:00
committed by GitHub
18 changed files with 793 additions and 103 deletions

View File

@@ -0,0 +1,97 @@
#if !defined(CODEQL_ITERATOR_H)
#define CODEQL_ITERATOR_H
typedef unsigned long size_t;
#include "type_traits.h"
namespace std {
struct ptrdiff_t;
template<class I> struct iterator_traits;
template <class Category,
class value_type,
class difference_type = ptrdiff_t,
class pointer_type = value_type*,
class reference_type = value_type&>
struct iterator {
typedef Category iterator_category;
iterator();
iterator(iterator<Category, remove_const_t<value_type> > const &other); // non-const -> const conversion constructor
iterator &operator++();
iterator operator++(int);
iterator &operator--();
iterator operator--(int);
bool operator==(iterator other) const;
bool operator!=(iterator other) const;
reference_type operator*() const;
pointer_type operator->() const;
iterator operator+(int);
iterator operator-(int);
iterator &operator+=(int);
iterator &operator-=(int);
int operator-(iterator);
reference_type operator[](int);
};
struct input_iterator_tag {};
struct forward_iterator_tag : public input_iterator_tag {};
struct bidirectional_iterator_tag : public forward_iterator_tag {};
struct random_access_iterator_tag : public bidirectional_iterator_tag {};
struct output_iterator_tag {};
template<class Container>
class back_insert_iterator {
protected:
Container* container = nullptr;
public:
using iterator_category = output_iterator_tag;
using value_type = void;
using difference_type = ptrdiff_t;
using pointer = void;
using reference = void;
using container_type = Container;
constexpr back_insert_iterator() noexcept = default;
constexpr explicit back_insert_iterator(Container& x);
back_insert_iterator& operator=(const typename Container::value_type& value);
back_insert_iterator& operator=(typename Container::value_type&& value);
back_insert_iterator& operator*();
back_insert_iterator& operator++();
back_insert_iterator operator++(int);
};
template<class Container>
constexpr back_insert_iterator<Container> back_inserter(Container& x) {
return back_insert_iterator<Container>(x);
}
template<class Container>
class front_insert_iterator {
protected:
Container* container = nullptr;
public:
using iterator_category = output_iterator_tag;
using value_type = void;
using difference_type = ptrdiff_t;
using pointer = void;
using reference = void;
using container_type = Container;
constexpr front_insert_iterator() noexcept = default;
constexpr explicit front_insert_iterator(Container& x);
constexpr front_insert_iterator& operator=(const typename Container::value_type& value);
constexpr front_insert_iterator& operator=(typename Container::value_type&& value);
constexpr front_insert_iterator& operator*();
constexpr front_insert_iterator& operator++();
constexpr front_insert_iterator operator++(int);
};
template<class Container>
constexpr front_insert_iterator<Container> front_inserter(Container& x) {
return front_insert_iterator<Container>(x);
}
}
#endif

View File

@@ -0,0 +1,85 @@
#if !defined(CODEQL_STRING_H)
#define CODEQL_STRING_H
#include "iterator.h"
namespace std
{
template<class charT> struct char_traits;
typedef size_t streamsize;
template <class T> class allocator {
public:
allocator() throw();
typedef size_t size_type;
};
template<class charT, class traits = char_traits<charT>, class Allocator = allocator<charT> >
class basic_string {
public:
using value_type = charT;
using reference = value_type&;
using const_reference = const value_type&;
typedef typename Allocator::size_type size_type;
static const size_type npos = -1;
explicit basic_string(const Allocator& a = Allocator());
basic_string(const charT* s, const Allocator& a = Allocator());
template<class InputIterator> basic_string(InputIterator begin, InputIterator end, const Allocator& a = Allocator());
const charT* c_str() const;
charT* data() noexcept;
size_t length() const;
typedef std::iterator<random_access_iterator_tag, charT> iterator;
typedef std::iterator<random_access_iterator_tag, const charT> const_iterator;
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
const_iterator cbegin() const;
const_iterator cend() const;
void push_back(charT c);
const charT& front() const;
charT& front();
const charT& back() const;
charT& back();
const_reference operator[](size_type pos) const;
reference operator[](size_type pos);
const_reference at(size_type n) const;
reference at(size_type n);
template<class T> basic_string& operator+=(const T& t);
basic_string& operator+=(const charT* s);
basic_string& append(const basic_string& str);
basic_string& append(const charT* s);
basic_string& append(size_type n, charT c);
template<class InputIterator> basic_string& append(InputIterator first, InputIterator last);
basic_string& assign(const basic_string& str);
basic_string& assign(size_type n, charT c);
template<class InputIterator> basic_string& assign(InputIterator first, InputIterator last);
basic_string& insert(size_type pos, const basic_string& str);
basic_string& insert(size_type pos, size_type n, charT c);
basic_string& insert(size_type pos, const charT* s);
iterator insert(const_iterator p, size_type n, charT c);
template<class InputIterator> iterator insert(const_iterator p, InputIterator first, InputIterator last);
basic_string& replace(size_type pos1, size_type n1, const basic_string& str);
basic_string& replace(size_type pos1, size_type n1, size_type n2, charT c);
size_type copy(charT* s, size_type n, size_type pos = 0) const;
void clear() noexcept;
basic_string substr(size_type pos = 0, size_type n = npos) const;
void swap(basic_string& s) noexcept/*(allocator_traits<Allocator>::propagate_on_container_swap::value || allocator_traits<Allocator>::is_always_equal::value)*/;
};
template<class charT, class traits, class Allocator> basic_string<charT, traits, Allocator> operator+(const basic_string<charT, traits, Allocator>& lhs, const basic_string<charT, traits, Allocator>& rhs);
template<class charT, class traits, class Allocator> basic_string<charT, traits, Allocator> operator+(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs);
template<class charT, class traits, class Allocator> basic_string<charT, traits, Allocator> operator+(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs);
typedef basic_string<char> string;
}
#endif

View File

@@ -1,21 +1,44 @@
#if !defined(CODEQL_TYPE_TRAITS_H)
#define CODEQL_TYPE_TRAITS_H
typedef unsigned long size_t;
namespace std {
template<typename T>
struct remove_reference {
template<class T>
struct remove_const { typedef T type; };
template<class T>
struct remove_const<const T> { typedef T type; };
// `remove_const_t<T>` removes any `const` specifier from `T`
template<class T>
using remove_const_t = typename remove_const<T>::type;
template<class T>
struct remove_reference { typedef T type; };
template<class T>
struct remove_reference<T &> { typedef T type; };
template<class T>
struct remove_reference<T &&> { typedef T type; };
// `remove_reference_t<T>` removes any `&` from `T`
template<class T>
using remove_reference_t = typename remove_reference<T>::type;
template<class T>
struct decay_impl {
typedef T type;
};
template<typename T>
struct remove_reference<T&> {
typedef T type;
template<class T, size_t t_size>
struct decay_impl<T[t_size]> {
typedef T* type;
};
template<typename T>
struct remove_reference<T&&> {
typedef T type;
};
template<class T>
using decay_t = typename decay_impl<remove_reference_t<T>>::type;
}
#endif

View File

@@ -1 +1,17 @@
| tests.cpp:53:16:53:19 | data | This argument to an OS command is derived from $@ and then passed to system(string) | tests.cpp:33:34:33:39 | call to getenv | user input (getenv) |
edges
| tests.cpp:33:34:33:39 | call to getenv | tests.cpp:38:39:38:49 | environment indirection |
| tests.cpp:38:25:38:36 | strncat output argument | tests.cpp:42:5:42:16 | Phi |
| tests.cpp:38:39:38:49 | environment indirection | tests.cpp:38:25:38:36 | strncat output argument |
| tests.cpp:38:39:38:49 | environment indirection | tests.cpp:38:25:38:36 | strncat output argument |
| tests.cpp:42:5:42:16 | Phi | tests.cpp:51:22:51:25 | badSource output argument |
| tests.cpp:51:22:51:25 | badSource output argument | tests.cpp:53:16:53:19 | data indirection |
nodes
| tests.cpp:33:34:33:39 | call to getenv | semmle.label | call to getenv |
| tests.cpp:38:25:38:36 | strncat output argument | semmle.label | strncat output argument |
| tests.cpp:38:39:38:49 | environment indirection | semmle.label | environment indirection |
| tests.cpp:42:5:42:16 | Phi | semmle.label | Phi |
| tests.cpp:51:22:51:25 | badSource output argument | semmle.label | badSource output argument |
| tests.cpp:53:16:53:19 | data indirection | semmle.label | data indirection |
subpaths
#select
| tests.cpp:53:16:53:19 | data | tests.cpp:33:34:33:39 | call to getenv | tests.cpp:53:16:53:19 | data indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | tests.cpp:33:34:33:39 | call to getenv | user input (an environment variable) | tests.cpp:38:25:38:36 | strncat output argument | strncat output argument |

View File

@@ -1 +1,67 @@
| test.c:21:12:21:19 | command1 | This argument to an OS command is derived from $@ and then passed to system(string) | test.c:14:20:14:23 | argv | user input (argv) |
edges
| test.cpp:16:20:16:23 | argv | test.cpp:22:45:22:52 | userName indirection |
| test.cpp:22:13:22:20 | sprintf output argument | test.cpp:23:12:23:19 | command1 indirection |
| test.cpp:22:45:22:52 | userName indirection | test.cpp:22:13:22:20 | sprintf output argument |
| test.cpp:22:45:22:52 | userName indirection | test.cpp:22:13:22:20 | sprintf output argument |
| test.cpp:47:21:47:26 | call to getenv | test.cpp:50:35:50:43 | envCflags indirection |
| test.cpp:50:11:50:17 | sprintf output argument | test.cpp:51:10:51:16 | command indirection |
| test.cpp:50:35:50:43 | envCflags indirection | test.cpp:50:11:50:17 | sprintf output argument |
| test.cpp:50:35:50:43 | envCflags indirection | test.cpp:50:11:50:17 | sprintf output argument |
| test.cpp:62:9:62:16 | fread output argument | test.cpp:64:20:64:27 | filename indirection |
| test.cpp:64:11:64:17 | strncat output argument | test.cpp:65:10:65:16 | command indirection |
| test.cpp:64:20:64:27 | filename indirection | test.cpp:64:11:64:17 | strncat output argument |
| test.cpp:64:20:64:27 | filename indirection | test.cpp:64:11:64:17 | strncat output argument |
| test.cpp:82:9:82:16 | fread output argument | test.cpp:84:20:84:27 | filename indirection |
| test.cpp:84:11:84:17 | strncat output argument | test.cpp:85:32:85:38 | command indirection |
| test.cpp:84:20:84:27 | filename indirection | test.cpp:84:11:84:17 | strncat output argument |
| test.cpp:84:20:84:27 | filename indirection | test.cpp:84:11:84:17 | strncat output argument |
| test.cpp:91:9:91:16 | fread output argument | test.cpp:93:17:93:24 | filename indirection |
| test.cpp:93:11:93:14 | strncat output argument | test.cpp:94:45:94:48 | path indirection |
| test.cpp:93:17:93:24 | filename indirection | test.cpp:93:11:93:14 | strncat output argument |
| test.cpp:93:17:93:24 | filename indirection | test.cpp:93:11:93:14 | strncat output argument |
| test.cpp:106:20:106:25 | call to getenv | test.cpp:107:33:107:36 | path indirection |
| test.cpp:113:20:113:25 | call to getenv | test.cpp:114:19:114:22 | path indirection |
| test.cpp:119:20:119:25 | call to getenv | test.cpp:120:19:120:22 | path indirection |
| test.cpp:140:9:140:11 | fread output argument | test.cpp:142:31:142:33 | str indirection |
| test.cpp:142:11:142:17 | sprintf output argument | test.cpp:143:10:143:16 | command indirection |
| test.cpp:142:31:142:33 | str indirection | test.cpp:142:11:142:17 | sprintf output argument |
| test.cpp:142:31:142:33 | str indirection | test.cpp:142:11:142:17 | sprintf output argument |
nodes
| test.cpp:16:20:16:23 | argv | semmle.label | argv |
| test.cpp:22:13:22:20 | sprintf output argument | semmle.label | sprintf output argument |
| test.cpp:22:45:22:52 | userName indirection | semmle.label | userName indirection |
| test.cpp:23:12:23:19 | command1 indirection | semmle.label | command1 indirection |
| test.cpp:47:21:47:26 | call to getenv | semmle.label | call to getenv |
| test.cpp:50:11:50:17 | sprintf output argument | semmle.label | sprintf output argument |
| test.cpp:50:35:50:43 | envCflags indirection | semmle.label | envCflags indirection |
| test.cpp:51:10:51:16 | command indirection | semmle.label | command indirection |
| test.cpp:62:9:62:16 | fread output argument | semmle.label | fread output argument |
| test.cpp:64:11:64:17 | strncat output argument | semmle.label | strncat output argument |
| test.cpp:64:20:64:27 | filename indirection | semmle.label | filename indirection |
| test.cpp:65:10:65:16 | command indirection | semmle.label | command indirection |
| test.cpp:82:9:82:16 | fread output argument | semmle.label | fread output argument |
| test.cpp:84:11:84:17 | strncat output argument | semmle.label | strncat output argument |
| test.cpp:84:20:84:27 | filename indirection | semmle.label | filename indirection |
| test.cpp:85:32:85:38 | command indirection | semmle.label | command indirection |
| test.cpp:91:9:91:16 | fread output argument | semmle.label | fread output argument |
| test.cpp:93:11:93:14 | strncat output argument | semmle.label | strncat output argument |
| test.cpp:93:17:93:24 | filename indirection | semmle.label | filename indirection |
| test.cpp:94:45:94:48 | path indirection | semmle.label | path indirection |
| test.cpp:106:20:106:25 | call to getenv | semmle.label | call to getenv |
| test.cpp:107:33:107:36 | path indirection | semmle.label | path indirection |
| test.cpp:113:20:113:25 | call to getenv | semmle.label | call to getenv |
| test.cpp:114:19:114:22 | path indirection | semmle.label | path indirection |
| test.cpp:119:20:119:25 | call to getenv | semmle.label | call to getenv |
| test.cpp:120:19:120:22 | path indirection | semmle.label | path indirection |
| test.cpp:140:9:140:11 | fread output argument | semmle.label | fread output argument |
| test.cpp:142:11:142:17 | sprintf output argument | semmle.label | sprintf output argument |
| test.cpp:142:31:142:33 | str indirection | semmle.label | str indirection |
| test.cpp:143:10:143:16 | command indirection | semmle.label | command indirection |
subpaths
#select
| test.cpp:23:12:23:19 | command1 | test.cpp:16:20:16:23 | argv | test.cpp:23:12:23:19 | command1 indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:16:20:16:23 | argv | user input (a command-line argument) | test.cpp:22:13:22:20 | sprintf output argument | sprintf output argument |
| test.cpp:51:10:51:16 | command | test.cpp:47:21:47:26 | call to getenv | test.cpp:51:10:51:16 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:47:21:47:26 | call to getenv | user input (an environment variable) | test.cpp:50:11:50:17 | sprintf output argument | sprintf output argument |
| test.cpp:65:10:65:16 | command | test.cpp:62:9:62:16 | fread output argument | test.cpp:65:10:65:16 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:62:9:62:16 | fread output argument | user input (String read by fread) | test.cpp:64:11:64:17 | strncat output argument | strncat output argument |
| test.cpp:85:32:85:38 | command | test.cpp:82:9:82:16 | fread output argument | test.cpp:85:32:85:38 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:82:9:82:16 | fread output argument | user input (String read by fread) | test.cpp:84:11:84:17 | strncat output argument | strncat output argument |
| test.cpp:94:45:94:48 | path | test.cpp:91:9:91:16 | fread output argument | test.cpp:94:45:94:48 | path indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to execl | test.cpp:91:9:91:16 | fread output argument | user input (String read by fread) | test.cpp:93:11:93:14 | strncat output argument | strncat output argument |
| test.cpp:143:10:143:16 | command | test.cpp:140:9:140:11 | fread output argument | test.cpp:143:10:143:16 | command indirection | This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to system(string) | test.cpp:140:9:140:11 | fread output argument | user input (String read by fread) | test.cpp:142:11:142:17 | sprintf output argument | sprintf output argument |

View File

@@ -1,33 +0,0 @@
// Semmle test case for rule ExecTainted.ql (Uncontrolled data used in OS command)
// Associated with CWE-078: OS Command Injection. http://cwe.mitre.org/data/definitions/78.html
///// Library routines /////
int sprintf(char *s, const char *format, ...);
int system(const char *string);
extern void encodeShellString(char *shellStr, int maxChars, const char* cStr);
///// Test code /////
int main(int argc, char** argv) {
char *userName = argv[2];
{
// BAD: a string from the user is injected directly into
// a command.
char command1[1000] = {0};
sprintf(command1, "userinfo -v \"%s\"", userName);
system(command1);
}
{
// GOOD: the user string is encoded by a library routine.
char userNameQuoted[1000] = {0};
encodeShellString(userNameQuoted, 1000, userName);
char command2[1000] = {0};
sprintf(command2, "userinfo -v %s", userNameQuoted);
system(command2);
}
}

View File

@@ -0,0 +1,174 @@
// Semmle test case for rule ExecTainted.ql (Uncontrolled data used in OS command)
// Associated with CWE-078: OS Command Injection. http://cwe.mitre.org/data/definitions/78.html
///// Library routines /////
int sprintf(char *s, const char *format, ...);
int system(const char *string);
char *getenv(char *var);
extern void encodeShellString(char *shellStr, int maxChars, const char* cStr);
#include "../../../../../../include/string.h"
///// Test code /////
int main(int argc, char** argv) {
char *userName = argv[2];
{
// BAD: a string from the user is injected directly into
// a command.
char command1[1000] = {0};
sprintf(command1, "userinfo -v \"%s\"", userName);
system(command1);
}
{
// GOOD: the user string is encoded by a library routine.
char userNameQuoted[1000] = {0};
encodeShellString(userNameQuoted, 1000, userName);
char command2[1000] = {0};
sprintf(command2, "userinfo -v %s", userNameQuoted);
system(command2);
}
}
void test2(char* arg2) {
// GOOD?: the user string is the *first* part of the command, like $CC in many environments
char *envCC = getenv("CC");
char command[1000];
sprintf("%s %s", envCC, arg2);
system(command);
}
void test3(char* arg1) {
// GOOD?: the user string is a `$CFLAGS` environment variable
char *envCflags = getenv("CFLAGS");
char command[1000];
sprintf(command, "%s %s", arg1, envCflags);
system(command);
}
typedef unsigned long size_t;
typedef void FILE;
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);
char *strncat(char *s1, const char *s2, size_t n);
void test4(FILE *f) {
// BAD: the user string is injected directly into a command
char command[1000] = "mv ", filename[1000];
fread(filename, 1, 1000, f);
strncat(command, filename, 1000);
system(command);
}
void test5(FILE *f) {
// GOOD?: the user string is the start of a command
char command[1000], filename[1000] = " test.txt";
fread(command, 1, 1000, f);
strncat(command, filename, 1000);
system(command);
}
int execl(char *path, char *arg1, ...);
void test6(FILE *f) {
// BAD: the user string is injected directly into a command
char command[1000] = "mv ", filename[1000];
fread(filename, 1, 1000, f);
strncat(command, filename, 1000);
execl("/bin/sh", "sh", "-c", command);
}
void test7(FILE *f) {
// GOOD [FALSE POSITIVE]: the user string is a positional argument to a shell script
char path[1000] = "/home/me/", filename[1000];
fread(filename, 1, 1000, f);
strncat(path, filename, 1000);
execl("/bin/sh", "sh", "-c", "script.sh", path);
}
void test8(char *arg2) {
// GOOD?: the user string is the *first* part of the command, like $CC in many environments
std::string envCC(getenv("CC"));
std::string command = envCC + arg2;
system(command.c_str());
}
void test9(FILE *f) {
// BAD: the user string is injected directly into a command
std::string path(getenv("something"));
std::string command = "mv " + path;
system(command.c_str());
}
void test10(FILE *f) {
// BAD: the user string is injected directly into a command
std::string path(getenv("something"));
system(("mv " + path).c_str());
}
void test11(FILE *f) {
// BAD: the user string is injected directly into a command
std::string path(getenv("something"));
system(("mv " + path).data());
}
int atoi(char *);
void test12(FILE *f) {
char temp[10];
char command[1000];
fread(temp, 1, 10, f);
int x = atoi(temp);
sprintf(command, "tail -n %d foo.log", x);
system(command); // GOOD: the user string was converted to an integer and back
}
void test13(FILE *f) {
char str[1000];
char command[1000];
fread(str, 1, 1000, f);
sprintf(command, "echo %s", str);
system(command); // BAD: the user string was printed into the command with the %s specifier
}
void test14(FILE *f) {
char str[1000];
char command[1000];
fread(str, 1, 1000, f);
sprintf(command, "echo %p", str);
system(command); // GOOD: the user string's address was printed into the command with the %p specifier
}
void test15(FILE *f) {
char temp[10];
char command[1000];
fread(temp, 1, 10, f);
int x = atoi(temp);
char temp2[10];
sprintf(temp2, "%d", x);
sprintf(command, "tail -n %s foo.log", temp2);
system(command); // GOOD: the user string was converted to an integer and back
}
// TODO: test for call context sensitivity at concatenation site
// open question: do we want to report certain sources even when they're the start of the string?