Merge pull request #14928 from MathiasVP/surprising-lifetimes-c_str

C++: Add a new query for calling `c_str` on temporary objects
This commit is contained in:
Mathias Vorreiter Pedersen
2023-11-29 10:15:11 +00:00
committed by GitHub
13 changed files with 473 additions and 9 deletions

View File

@@ -123,7 +123,7 @@ private class StdSequenceContainerData extends TaintFunction {
/**
* The standard container functions `push_back` and `push_front`.
*/
private class StdSequenceContainerPush extends TaintFunction {
class StdSequenceContainerPush extends MemberFunction {
StdSequenceContainerPush() {
this.getClassAndName("push_back") instanceof Vector or
this.getClassAndName(["push_back", "push_front"]) instanceof Deque or
@@ -131,6 +131,17 @@ private class StdSequenceContainerPush extends TaintFunction {
this.getClassAndName(["push_back", "push_front"]) instanceof List
}
/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}
private class StdSequenceContainerPushModel extends StdSequenceContainerPush, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from parameter to qualifier
input.isParameterDeref(0) and
@@ -160,7 +171,7 @@ private class StdSequenceContainerFrontBack extends TaintFunction {
/**
* The standard container functions `insert` and `insert_after`.
*/
private class StdSequenceContainerInsert extends TaintFunction {
class StdSequenceContainerInsert extends MemberFunction {
StdSequenceContainerInsert() {
this.getClassAndName("insert") instanceof Deque or
this.getClassAndName("insert") instanceof List or
@@ -181,7 +192,9 @@ private class StdSequenceContainerInsert extends TaintFunction {
* Gets the index of a parameter to this function that is an iterator.
*/
int getAnIteratorParameterIndex() { this.getParameter(result).getType() instanceof Iterator }
}
private class StdSequenceContainerInsertModel extends StdSequenceContainerInsert, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from parameter to container itself (qualifier) and return value
(
@@ -253,11 +266,28 @@ private class StdSequenceContainerAt extends TaintFunction {
}
/**
* The standard vector `emplace` function.
* The standard `emplace` function.
*/
class StdVectorEmplace extends TaintFunction {
StdVectorEmplace() { this.getClassAndName("emplace") instanceof Vector }
class StdSequenceEmplace extends MemberFunction {
StdSequenceEmplace() {
this.getClassAndName("emplace") instanceof Vector
or
this.getClassAndName("emplace") instanceof List
or
this.getClassAndName("emplace") instanceof Deque
}
/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}
private class StdSequenceEmplaceModel extends StdSequenceEmplace, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from any parameter except the position iterator to qualifier and return value
// (here we assume taint flow from any constructor parameter to the constructed object)
@@ -269,12 +299,36 @@ class StdVectorEmplace extends TaintFunction {
}
}
/**
* The standard vector `emplace` function.
*/
class StdVectorEmplace extends StdSequenceEmplace {
StdVectorEmplace() { this.getDeclaringType() instanceof Vector }
}
/**
* The standard vector `emplace_back` function.
*/
class StdVectorEmplaceBack extends TaintFunction {
StdVectorEmplaceBack() { this.getClassAndName("emplace_back") instanceof Vector }
class StdSequenceEmplaceBack extends MemberFunction {
StdSequenceEmplaceBack() {
this.getClassAndName("emplace_back") instanceof Vector
or
this.getClassAndName("emplace_back") instanceof List
or
this.getClassAndName("emplace_back") instanceof Deque
}
/**
* Gets the index of a parameter to this function that is a reference to the
* value type of the container.
*/
int getAValueTypeParameterIndex() {
this.getParameter(result).getUnspecifiedType().(ReferenceType).getBaseType() =
this.getDeclaringType().getTemplateArgument(0).(Type).getUnspecifiedType() // i.e. the `T` of this `std::vector<T>`
}
}
private class StdSequenceEmplaceBackModel extends StdSequenceEmplaceBack, TaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from any parameter to qualifier
// (here we assume taint flow from any constructor parameter to the constructed object)
@@ -282,3 +336,10 @@ class StdVectorEmplaceBack extends TaintFunction {
output.isQualifierObject()
}
}
/**
* The standard vector `emplace_back` function.
*/
class StdVectorEmplaceBack extends StdSequenceEmplaceBack {
StdVectorEmplaceBack() { this.getDeclaringType() instanceof Vector }
}

View File

@@ -99,9 +99,11 @@ private class StdStringConstructor extends Constructor, StdStringTaintFunction {
/**
* The `std::string` function `c_str`.
*/
private class StdStringCStr extends StdStringTaintFunction {
class StdStringCStr extends MemberFunction {
StdStringCStr() { this.getClassAndName("c_str") instanceof StdBasicString }
}
private class StdStringCStrModel extends StdStringCStr, StdStringTaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from string itself (qualifier) to return value
input.isQualifierObject() and
@@ -112,9 +114,11 @@ private class StdStringCStr extends StdStringTaintFunction {
/**
* The `std::string` function `data`.
*/
private class StdStringData extends StdStringTaintFunction {
class StdStringData extends MemberFunction {
StdStringData() { this.getClassAndName("data") instanceof StdBasicString }
}
private class StdStringDataModel extends StdStringData, StdStringTaintFunction {
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
// flow from string itself (qualifier) to return value
input.isQualifierObject() and

View File

@@ -0,0 +1,44 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Calling <code>c_str</code> on a <code>std::string</code> object returns a pointer to the underlying character array.
When the <code>std::string</code> object is destroyed, the pointer returned by <code>c_str</code> is no
longer valid. If the pointer is used after the <code>std::string</code> object is destroyed, then the behavior is undefined.
</p>
</overview>
<recommendation>
<p>
Ensure that the pointer returned by <code>c_str</code> does not outlive the underlying <code>std::string</code> object.
</p>
</recommendation>
<example>
<p>
The following example concatenates two <code>std::string</code> objects, and then converts the resulting string to a
C string using <code>c_str</code> so that it can be passed to the <code>work</code> function.
However, the underlying <code>std::string</code> object that represents the concatenated string is destroyed as soon as the call
to <code>c_str</code> returns. This means that <code>work</code> is given a pointer to invalid memory.
</p>
<sample src="UseOfStringAfterLifetimeEndsBad.cpp" />
<p>
The following example fixes the above code by ensuring that the pointer returned by the call to <code>c_str</code> does
not outlive the underlying <code>std::string</code> objects. This ensures that the pointer passed to <code>work</code>
points to valid memory.
</p>
<sample src="UseOfStringAfterLifetimeEndsGood.cpp" />
</example>
<references>
<li><a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/MEM50-CPP.+Do+not+access+freed+memory">MEM50-CPP. Do not access freed memory</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,100 @@
/**
* @name Use of string after lifetime ends
* @description If the value of a call to 'c_str' outlives the underlying object it may lead to unexpected behavior.
* @kind problem
* @precision high
* @id cpp/use-of-string-after-lifetime-ends
* @problem.severity warning
* @security-severity 8.8
* @tags reliability
* security
* external/cwe/cwe-416
* external/cwe/cwe-664
*/
import cpp
import semmle.code.cpp.models.implementations.StdString
import semmle.code.cpp.models.implementations.StdContainer
/**
* Holds if `e` will be consumed by its parent as a glvalue and does not have
* an lvalue-to-rvalue conversion. This means that it will be materialized into
* a temporary object.
*/
predicate isTemporary(Expr e) {
e instanceof TemporaryObjectExpr
or
e.isPRValueCategory() and
e.getUnspecifiedType() instanceof Class and
not e.hasLValueToRValueConversion()
}
/** Holds if `e` is written to a container. */
predicate isStoredInContainer(Expr e) {
exists(StdSequenceContainerInsert insert, Call call, int index |
call = insert.getACallToThisFunction() and
index = insert.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceContainerPush push, Call call, int index |
call = push.getACallToThisFunction() and
index = push.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceEmplace emplace, Call call, int index |
call = emplace.getACallToThisFunction() and
index = emplace.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
or
exists(StdSequenceEmplaceBack emplaceBack, Call call, int index |
call = emplaceBack.getACallToThisFunction() and
index = emplaceBack.getAValueTypeParameterIndex() and
call.getArgument(index) = e
)
}
/**
* Holds if the value of `e` outlives the enclosing full expression. For
* example, because the value is stored in a local variable.
*/
predicate outlivesFullExpr(Expr e) {
any(Assignment assign).getRValue() = e
or
any(Variable v).getInitializer().getExpr() = e
or
any(ReturnStmt ret).getExpr() = e
or
exists(ConditionalExpr cond |
outlivesFullExpr(cond) and
[cond.getThen(), cond.getElse()] = e
)
or
exists(BinaryOperation bin |
outlivesFullExpr(bin) and
bin.getAnOperand() = e
)
or
exists(ClassAggregateLiteral aggr |
outlivesFullExpr(aggr) and
aggr.getAFieldExpr(_) = e
)
or
exists(ArrayAggregateLiteral aggr |
outlivesFullExpr(aggr) and
aggr.getAnElementExpr(_) = e
)
or
isStoredInContainer(e)
}
from Call c
where
outlivesFullExpr(c) and
not c.isFromUninstantiatedTemplate(_) and
(c.getTarget() instanceof StdStringCStr or c.getTarget() instanceof StdStringData) and
isTemporary(c.getQualifier().getFullyConverted())
select c,
"The underlying string object is destroyed after the call to '" + c.getTarget() + "' returns."

View File

@@ -0,0 +1,9 @@
#include <string>
void work(const char*);
// BAD: the concatenated string is deallocated when `c_str` returns. So `work`
// is given a pointer to invalid memory.
void work_with_combined_string_bad(std::string s1, std::string s2) {
const char* combined_string = (s1 + s2).c_str();
work(combined_string);
}

View File

@@ -0,0 +1,9 @@
#include <string>
void work(const char*);
// GOOD: the concatenated string outlives the call to `work`. So the pointer
// obtainted from `c_str` is valid.
void work_with_combined_string_good(std::string s1, std::string s2) {
auto combined_string = s1 + s2;
work(combined_string.c_str());
}

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `cpp/use-of-string-after-lifetime-ends`, to detect calls to `c_str` on strings that will be destroyed immediately.

View File

@@ -0,0 +1,12 @@
| test.cpp:165:34:165:38 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:166:39:166:43 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:167:44:167:48 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:169:29:169:33 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:178:37:178:41 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:181:39:181:43 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:183:37:183:41 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |
| test.cpp:187:34:187:37 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:188:39:188:42 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:189:44:189:47 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:191:29:191:32 | call to data | The underlying string object is destroyed after the call to 'data' returns. |
| test.cpp:193:31:193:35 | call to c_str | The underlying string object is destroyed after the call to 'c_str' returns. |

View File

@@ -0,0 +1,2 @@
Security/CWE/CWE-416/UseOfStringAfterLifetimeEnds.ql

View File

@@ -0,0 +1,219 @@
typedef unsigned long size_t;
namespace std {
template<class T> struct remove_reference { typedef T type; };
template<class T> struct remove_reference<T &> { typedef T type; };
template<class T> struct remove_reference<T &&> { typedef T type; };
template<class T> using remove_reference_t = typename remove_reference<T>::type;
template< class T > std::remove_reference_t<T>&& move( T&& t );
}
// --- iterator ---
namespace std {
template<class T> struct remove_const { typedef T type; };
template<class T> struct remove_const<const T> { typedef T type; };
// `remove_const_t<T>` removes any `const` specifier from `T`
template<class T> using remove_const_t = typename remove_const<T>::type;
struct ptrdiff_t;
template<class I> struct iterator_traits;
template <class Category,
class value_type,
class difference_type = ptrdiff_t,
class pointer_type = value_type*,
class reference_type = value_type&>
struct iterator {
typedef Category iterator_category;
iterator();
iterator(iterator<Category, remove_const_t<value_type> > const &other); // non-const -> const conversion constructor
iterator &operator++();
iterator operator++(int);
iterator &operator--();
iterator operator--(int);
bool operator==(iterator other) const;
bool operator!=(iterator other) const;
reference_type operator*() const;
pointer_type operator->() const;
iterator operator+(int);
iterator operator-(int);
iterator &operator+=(int);
iterator &operator-=(int);
int operator-(iterator);
reference_type operator[](int);
};
struct input_iterator_tag {};
struct forward_iterator_tag : public input_iterator_tag {};
struct bidirectional_iterator_tag : public forward_iterator_tag {};
struct random_access_iterator_tag : public bidirectional_iterator_tag {};
}
// --- string ---
namespace std
{
template<class charT> struct char_traits;
typedef size_t streamsize;
template <class T> class allocator {
public:
allocator() throw();
typedef size_t size_type;
};
template<class charT, class traits = char_traits<charT>, class Allocator = allocator<charT> >
class basic_string {
public:
using value_type = charT;
using reference = value_type&;
using const_reference = const value_type&;
typedef typename Allocator::size_type size_type;
static const size_type npos = -1;
explicit basic_string(const Allocator& a = Allocator());
basic_string(const charT* s, const Allocator& a = Allocator());
template<class InputIterator> basic_string(InputIterator begin, InputIterator end, const Allocator& a = Allocator());
const charT* c_str() const;
charT* data() noexcept;
size_t length() const;
typedef std::iterator<random_access_iterator_tag, charT> iterator;
typedef std::iterator<random_access_iterator_tag, const charT> const_iterator;
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
const_iterator cbegin() const;
const_iterator cend() const;
const_reference operator[](size_type pos) const;
reference operator[](size_type pos);
const_reference at(size_type n) const;
reference at(size_type n);
basic_string& insert(size_type pos, const basic_string& str);
basic_string& insert(size_type pos, size_type n, charT c);
basic_string& insert(size_type pos, const charT* s);
iterator insert(const_iterator p, size_type n, charT c);
template<class InputIterator> iterator insert(const_iterator p, InputIterator first, InputIterator last);
basic_string& replace(size_type pos1, size_type n1, const basic_string& str);
basic_string& replace(size_type pos1, size_type n1, size_type n2, charT c);
};
template<class charT, class traits, class Allocator> basic_string<charT, traits, Allocator> operator+(const basic_string<charT, traits, Allocator>& lhs, const basic_string<charT, traits, Allocator>& rhs);
template<class charT, class traits, class Allocator> basic_string<charT, traits, Allocator> operator+(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs);
typedef basic_string<char> string;
}
// --- vector ---
namespace std {
template<class T, class Allocator = allocator<T>>
class vector {
public:
using value_type = T;
using reference = value_type&;
using const_reference = const value_type&;
using size_type = unsigned int;
using iterator = std::iterator<random_access_iterator_tag, T>;
using const_iterator = std::iterator<random_access_iterator_tag, const T>;
vector() noexcept(noexcept(Allocator()));
explicit vector(const Allocator&) noexcept;
explicit vector(size_type n, const Allocator& = Allocator());
vector(size_type n, const T& value, const Allocator& = Allocator());
template<class InputIterator, class IteratorCategory = typename InputIterator::iterator_category> vector(InputIterator first, InputIterator last, const Allocator& = Allocator());
~vector();
void push_back(const T& x);
void push_back(T&& x);
iterator insert(const_iterator position, const T& x);
iterator insert(const_iterator position, T&& x);
iterator insert(const_iterator position, size_type n, const T& x);
template<class InputIterator> iterator insert(const_iterator position, InputIterator first, InputIterator last);
template <class... Args> iterator emplace (const_iterator position, Args&&... args);
template <class... Args> void emplace_back (Args&&... args);
};
}
struct S {
const char* s;
};
void call_by_value(S);
void call_by_cref(const S&);
void call(const char*);
const char* test1(bool b1, bool b2) {
auto s1 = std::string("hello").c_str(); // BAD
auto s2 = b1 ? std::string("hello").c_str() : ""; // BAD
auto s3 = b2 ? "" : std::string("hello").c_str(); // BAD
const char* s4;
s4 = std::string("hello").c_str(); // BAD
call(std::string("hello").c_str()); // GOOD
call(b1 ? std::string("hello").c_str() : ""); // GOOD
call(b1 ? (b2 ? "" : std::string("hello").c_str()) : ""); // GOOD
call_by_value({ std::string("hello").c_str() }); // GOOD
call_by_cref({ std::string("hello").c_str() }); // GOOD
std::vector<const char*> v1;
v1.push_back(std::string("hello").c_str()); // BAD
std::vector<S> v2;
v2.push_back({ std::string("hello").c_str() }); // BAD
S s5[] = { { std::string("hello").c_str() } }; // BAD
char c = std::string("hello").c_str()[0]; // GOOD
auto s6 = std::string("hello").data(); // BAD
auto s7 = b1 ? std::string("hello").data() : ""; // BAD
auto s8 = b2 ? "" : std::string("hello").data(); // BAD
char* s9;
s9 = std::string("hello").data(); // BAD
return std::string("hello").c_str(); // BAD
}
void test2(bool b1, bool b2) {
std::string s("hello");
auto s1 = s.c_str(); // GOOD
auto s2 = b1 ? s.c_str() : ""; // GOOD
auto s3 = b2 ? "" : s.c_str(); // GOOD
const char* s4;
s4 = s.c_str(); // GOOD
std::string& sRef = s;
auto s5 = sRef.c_str(); // GOOD
auto s6 = b1 ? sRef.c_str() : ""; // GOOD
auto s7 = b2 ? "" : sRef.c_str(); // GOOD
const char* s8;
s8 = sRef.c_str(); // GOOD
std::string&& sRefRef = std::string("hello");
auto s9 = sRefRef.c_str(); // GOOD
auto s10 = b1 ? sRefRef.c_str() : ""; // GOOD
auto s11 = b2 ? "" : sRefRef.c_str(); // GOOD
const char* s12;
s12 = sRefRef.c_str(); // GOOD
}