Merge pull request #17611 from microsoft/brodes/wcharcharconversion_false_positives_upstream5

Brodes/wcharcharconversion false positives upstream5
This commit is contained in:
Jeroen Ketema
2024-10-01 08:00:51 +02:00
committed by GitHub
4 changed files with 140 additions and 6 deletions

View File

@@ -13,23 +13,85 @@
*/
import cpp
import semmle.code.cpp.controlflow.Guards
class WideCharPointerType extends PointerType {
WideCharPointerType() { this.getBaseType() instanceof WideCharType }
}
/**
* Given type `t`, recurses through and returns all
* intermediate base types, including `t`.
*/
Type getABaseType(Type t) {
result = t
or
result = getABaseType(t.(DerivedType).getBaseType())
or
result = getABaseType(t.(TypedefType).getBaseType())
}
/**
* A type that may also be `CharPointerType`, but that are likely used as arbitrary buffers.
*/
class UnlikelyToBeAStringType extends Type {
UnlikelyToBeAStringType() {
this.(PointerType).getBaseType().(CharType).isUnsigned() or
this.(PointerType).getBaseType().getName().toLowerCase().matches("%byte") or
this.getName().toLowerCase().matches("%byte") or
this.(PointerType).getBaseType().hasName("uint8_t")
exists(Type targ | getABaseType(this) = targ |
// NOTE: not using CharType isUnsigned, but rather look for any explicitly declared unsigned
// char types. Assuming these are used for buffers, not strings.
targ.(CharType).getName().toLowerCase().matches("unsigned%") or
targ.getName().toLowerCase().matches(["uint8_t", "%byte%"])
)
}
}
// Types that can be wide depending on the UNICODE macro
// see https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
class UnicodeMacroDependentWidthType extends Type {
UnicodeMacroDependentWidthType() {
exists(Type targ | getABaseType(this) = targ |
targ.getName() in [
"LPCTSTR",
"LPTSTR",
"PCTSTR",
"PTSTR",
"TBYTE",
"TCHAR"
]
)
}
}
class UnicodeMacro extends Macro {
UnicodeMacro() { this.getName().toLowerCase().matches("%unicode%") }
}
class UnicodeMacroInvocation extends MacroInvocation {
UnicodeMacroInvocation() { this.getMacro() instanceof UnicodeMacro }
}
/**
* Holds when a expression whose type is UnicodeMacroDependentWidthType and
* is observed to be guarded by a check involving a bitwise-and operation
* with a UnicodeMacroInvocation.
* Such expressions are assumed to be checked dynamically, i.e.,
* the flag would indicate if UNICODE typing is set correctly to allow
* or disallow a widening cast.
*/
predicate isLikelyDynamicallyChecked(Expr e) {
e.getType() instanceof UnicodeMacroDependentWidthType and
exists(GuardCondition gc, BitwiseAndExpr bai, UnicodeMacroInvocation umi |
bai.getAnOperand() = umi.getExpr()
|
// bai == 0 is false when reaching `e.getBasicBlock()`.
// That is, bai != 0 when reaching `e.getBasicBlock()`.
gc.ensuresEq(bai, 0, e.getBasicBlock(), false)
or
// bai == k and k != 0 is true when reaching `e.getBasicBlock()`.
gc.ensuresEq(bai, any(int k | k != 0), e.getBasicBlock(), true)
)
}
from Expr e1, Cast e2
where
e2 = e1.getConversion() and
@@ -42,7 +104,11 @@ where
not e1.getType() instanceof UnlikelyToBeAStringType and
// Avoid castings from 'new' expressions as typically these will be safe
// Example: `__Type* ret = reinterpret_cast<__Type*>(New(m_pmo) char[num * sizeof(__Type)]);`
not exists(NewOrNewArrayExpr newExpr | newExpr.getAChild*() = e1)
not exists(NewOrNewArrayExpr newExpr | newExpr.getAChild*() = e1) and
// Avoid cases where the cast is guarded by a check to determine if
// unicode encoding is enabled in such a way to disallow the dangerous cast
// at runtime.
not isLikelyDynamicallyChecked(e1)
select e1,
"Conversion from " + e1.getType().toString() + " to " + e2.getType().toString() +
". Use of invalid string can lead to undefined behavior."

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to detect byte arrays.
* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to recognize dynamic checks prior to possible dangerous widening.

View File

@@ -53,4 +53,59 @@ void NonStringFalsePositiveTest2(unsigned char* buffer)
{
wchar_t *lpWchar = NULL;
lpWchar = (LPWSTR)buffer; // Possible False Positive
}
}
typedef unsigned char BYTE;
using FOO = BYTE*;
void NonStringFalsePositiveTest3(FOO buffer)
{
wchar_t *lpWchar = NULL;
lpWchar = (LPWSTR)buffer; // GOOD
}
#define UNICODE 0x8
// assume EMPTY_MACRO is tied to if UNICODE is enabled
#ifdef EMPTY_MACRO
typedef WCHAR* LPTSTR;
#else
typedef char* LPTSTR;
#endif
void CheckedConversionFalsePositiveTest3(unsigned short flags, LPTSTR buffer)
{
wchar_t *lpWchar = NULL;
if(flags & UNICODE)
lpWchar = (LPWSTR)buffer; // GOOD
else
lpWchar = (LPWSTR)buffer; // BUG
if((flags & UNICODE) == 0x8)
lpWchar = (LPWSTR)buffer; // GOOD
else
lpWchar = (LPWSTR)buffer; // BUG
if((flags & UNICODE) != 0x8)
lpWchar = (LPWSTR)buffer; // BUG
else
lpWchar = (LPWSTR)buffer; // GOOD
// Bad operator precedence
if(flags & UNICODE == 0x8)
lpWchar = (LPWSTR)buffer; // BUG
else
lpWchar = (LPWSTR)buffer; // BUG
if((flags & UNICODE) != 0)
lpWchar = (LPWSTR)buffer; // GOOD
else
lpWchar = (LPWSTR)buffer; // BUG
if((flags & UNICODE) == 0)
lpWchar = (LPWSTR)buffer; // BUG
else
lpWchar = (LPWSTR)buffer; // GOOD
lpWchar = (LPWSTR)buffer; // BUG
}

View File

@@ -3,3 +3,11 @@
| WcharCharConversion.cpp:24:22:24:27 | lpChar | Conversion from char * to wchar_t *. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:26:23:26:28 | lpChar | Conversion from char * to LPCWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:27:17:27:22 | lpChar | Conversion from char * to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:82:21:82:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:87:21:87:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:90:21:90:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:96:21:96:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:98:21:98:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:103:21:103:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:106:21:106:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
| WcharCharConversion.cpp:110:20:110:25 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |