C++: Model varargs in IR, Part I

This change introduces a new synthesized `IRVariable` in every varargs function. This variable represents the entire set of arguments passed to the ellipsis by the caller. We give it an opaque type big enough hold all of the arguments passed by the largest vararg call in the database. It is treated just like any other parameter. It is initialized the same, it has indirect buffers, etc.

I had to introduce a couple new APIs to `Call` and `Function`. The QLDoc comments should explain these. I added tests for these new APIs as well.

The next step will be to change the IR generation for the `va_*` macros to manipulate the ellipsis parameter.
This commit is contained in:
Dave Bartolomeo
2020-03-17 11:11:48 -04:00
parent 17c57dcb4c
commit 9cc3cda58e
16 changed files with 361 additions and 69 deletions

View File

@@ -364,6 +364,12 @@ class Function extends Declaration, ControlFlowNode, AccessHolder, @function {
/** Holds if this function is a varargs function. */
predicate isVarargs() { hasSpecifier("varargs") }
/**
* Gets the index of the `...` parameter, if any. If present, the value will always be equal to
* `getNumberOfParameters()`.
*/
int getEllipsisParameterIndex() { isVarargs() and result = getNumberOfParameters() }
/** Gets a type that is specified to be thrown by the function. */
Type getAThrownType() { result = getADeclarationEntry().getAThrownType() }

View File

@@ -2,6 +2,13 @@ import semmle.code.cpp.exprs.Expr
import semmle.code.cpp.Function
private import semmle.code.cpp.dataflow.EscapesTree
/**
* Gets the index of the `...` parameter, if any.
*/
private int getEllipsisParameterIndex(RoutineType type) {
type.getParameterType(result) instanceof UnknownType
}
/**
* A C/C++ call.
*
@@ -78,6 +85,32 @@ abstract class Call extends Expr, NameQualifiableElement {
override string toString() { none() }
/**
* Gets the index of the `...` parameter, if any. This will be one greater than the index of the
* last declared positional parameter.
*/
abstract int getEllipsisParameterIndex();
/**
* Gets the index of the parameter that will be initialized with the value of the argument
* specified by `argIndex`. For ordinary positional parameters, the argument and parameter indices
* will be equal. For a call to a varargs function, all arguments passed to the `...` will be
* mapped to the index returned by `getEllipsisParameterIndex()`.
*/
final int getParameterIndexForArgument(int argIndex) {
exists(getArgument(argIndex)) and
if argIndex >= getEllipsisParameterIndex()
then result = getEllipsisParameterIndex()
else result = argIndex
}
/**
* Holds if the argument specified by `index` is an argument to the `...` of a varargs function.
*/
final predicate isEllipsisArgumentIndex(int index) {
exists(getArgument(index)) and index >= getEllipsisParameterIndex()
}
/**
* Holds if this call passes the variable accessed by `va` by
* reference as the `i`th argument.
@@ -259,6 +292,12 @@ class FunctionCall extends Call, @funbindexpr {
else result = "call to unknown function"
}
final override int getEllipsisParameterIndex() {
if getTargetType() instanceof RoutineType
then result = getEllipsisParameterIndex(getTargetType())
else result = getTarget().getEllipsisParameterIndex()
}
override predicate mayBeImpure() {
this.getChild(_).mayBeImpure() or
this.getTarget().mayHaveSideEffects() or
@@ -378,6 +417,10 @@ class ExprCall extends Call, @callexpr {
override string toString() { result = "call to expression" }
override Function getTarget() { none() }
final override int getEllipsisParameterIndex() {
result = getEllipsisParameterIndex(getExpr().getType().stripType())
}
}
/**

View File

@@ -63,6 +63,10 @@ deprecated class MessageExpr extends Expr, Call {
override Expr getArgument(int n) { none() }
override int getPrecedence() { none() }
final override int getEllipsisParameterIndex() {
none()
}
}
/**

View File

@@ -211,6 +211,16 @@ class IRThrowVariable extends IRTempVariable {
override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -211,6 +211,16 @@ class IRThrowVariable extends IRTempVariable {
override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -382,6 +382,9 @@ newtype TTranslatedElement =
translateFunction(func)
)
} or
TTranslatedEllipsisParameter(Function func) {
translateFunction(func) and func.isVarargs()
} or
TTranslatedReadEffects(Function func) { translateFunction(func) } or
// The read side effects in a function's return block
TTranslatedReadEffect(Parameter param) {

View File

@@ -16,6 +16,32 @@ private import TranslatedStmt
*/
TranslatedFunction getTranslatedFunction(Function func) { result.getAST() = func }
/**
* Gets the size, in bytes, of the variable used to represent the `...` parameter in a varargs
* function. This is determined by finding the total size of all of the arguments passed to the
* `...` in each call in the program, and choosing the maximum of those, with a minimum of 8 bytes.
*/
private int getEllipsisVariableByteSize() {
result =
max(int variableSize |
variableSize =
max(Call call, int callSize |
callSize =
sum(int argIndex |
call.isEllipsisArgumentIndex(argIndex)
|
call.getArgument(argIndex).getType().getSize()
)
|
callSize
)
or
variableSize = 8
|
variableSize
)
}
/**
* Represents the IR translation of a function. This is the root elements for
* all other elements associated with this function.
@@ -60,6 +86,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
final private TranslatedParameter getParameter(int index) {
result = getTranslatedParameter(func.getParameter(index))
or
index = func.getEllipsisParameterIndex() and
result = getTranslatedEllipsisParameter(func)
}
final override Instruction getFirstInstruction() { result = getInstruction(EnterFunctionTag()) }
@@ -113,7 +142,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
final override Instruction getChildSuccessor(TranslatedElement child) {
exists(int paramIndex |
child = getParameter(paramIndex) and
if exists(func.getParameter(paramIndex + 1))
if
exists(func.getParameter(paramIndex + 1)) or
func.getEllipsisParameterIndex() = paramIndex + 1
then result = getParameter(paramIndex + 1).getFirstInstruction()
else result = getConstructorInitList().getFirstInstruction()
)
@@ -237,10 +268,18 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
result = getReturnVariable()
}
final override predicate needsUnknownOpaqueType(int byteSize) {
byteSize = getEllipsisVariableByteSize()
}
final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
tag = ReturnValueTempVar() and
hasReturnValue() and
type = getTypeForPRValue(getReturnType())
or
tag = EllipsisTempVar() and
func.isVarargs() and
type = getUnknownOpaqueType(getEllipsisVariableByteSize())
}
/**
@@ -316,34 +355,29 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
}
/**
* Gets the `TranslatedParameter` that represents parameter `param`.
* Gets the `TranslatedPositionalParameter` that represents parameter `param`.
*/
TranslatedParameter getTranslatedParameter(Parameter param) { result.getAST() = param }
TranslatedPositionalParameter getTranslatedParameter(Parameter param) { result.getAST() = param }
/**
* Represents the IR translation of a function parameter, including the
* initialization of that parameter with the incoming argument.
* Gets the `TranslatedEllipsisParameter` for function `func`, if one exists.
*/
class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
Parameter param;
TranslatedEllipsisParameter getTranslatedEllipsisParameter(Function func) {
result.getFunction() = func
}
TranslatedParameter() { this = TTranslatedParameter(param) }
final override string toString() { result = param.toString() }
final override Locatable getAST() { result = param }
final override Function getFunction() {
result = param.getFunction() or
result = param.getCatchBlock().getEnclosingFunction()
}
/**
* The IR translation of a parameter to a function. This can be either a user-declared parameter
* (`TranslatedPositionParameter`) or the synthesized parameter used to represent a `...` in a
* varargs function (`TranslatedEllipsisParameter`).
*/
abstract class TranslatedParameter extends TranslatedElement {
final override TranslatedElement getChild(int id) { none() }
final override Instruction getFirstInstruction() {
result = getInstruction(InitializerVariableAddressTag())
}
final override TranslatedElement getChild(int id) { none() }
final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
kind instanceof GotoEdge and
(
@@ -368,16 +402,16 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
tag = InitializerVariableAddressTag() and
opcode instanceof Opcode::VariableAddress and
resultType = getTypeForGLValue(getVariableType(param))
resultType = getGLValueType()
or
tag = InitializerStoreTag() and
opcode instanceof Opcode::InitializeParameter and
resultType = getTypeForPRValue(getVariableType(param))
resultType = getPRValueType()
or
hasIndirection() and
tag = InitializerIndirectAddressTag() and
opcode instanceof Opcode::Load and
resultType = getTypeForPRValue(getVariableType(param))
resultType = getPRValueType()
or
hasIndirection() and
tag = InitializerIndirectStoreTag() and
@@ -391,7 +425,7 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
tag = InitializerVariableAddressTag() or
tag = InitializerIndirectStoreTag()
) and
result = getIRUserVariable(getFunction(), param)
result = getIRVariable()
}
final override Instruction getInstructionOperand(InstructionTag tag, OperandTag operandTag) {
@@ -416,13 +450,74 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
result = getInstruction(InitializerIndirectAddressTag())
}
predicate hasIndirection() {
abstract predicate hasIndirection();
abstract CppType getGLValueType();
abstract CppType getPRValueType();
abstract IRAutomaticVariable getIRVariable();
}
/**
* Represents the IR translation of a function parameter, including the
* initialization of that parameter with the incoming argument.
*/
class TranslatedPositionalParameter extends TranslatedParameter, TTranslatedParameter {
Parameter param;
TranslatedPositionalParameter() { this = TTranslatedParameter(param) }
final override string toString() { result = param.toString() }
final override Locatable getAST() { result = param }
final override Function getFunction() {
result = param.getFunction() or
result = param.getCatchBlock().getEnclosingFunction()
}
final override predicate hasIndirection() {
exists(Type t | t = param.getUnspecifiedType() |
t instanceof ArrayType or
t instanceof PointerType or
t instanceof ReferenceType
)
}
final override CppType getGLValueType() { result = getTypeForGLValue(getVariableType(param)) }
final override CppType getPRValueType() { result = getTypeForPRValue(getVariableType(param)) }
final override IRAutomaticUserVariable getIRVariable() {
result = getIRUserVariable(getFunction(), param)
}
}
/**
* The IR translation of the synthesized parameter used to represent the `...` in a varargs
* function.
*/
class TranslatedEllipsisParameter extends TranslatedParameter, TTranslatedEllipsisParameter {
Function func;
TranslatedEllipsisParameter() { this = TTranslatedEllipsisParameter(func) }
final override string toString() { result = "..." }
final override Locatable getAST() { result = func }
final override Function getFunction() { result = func }
final override predicate hasIndirection() { any() }
final override CppType getGLValueType() { result = getTypeForGLValue(any(UnknownType t)) }
final override CppType getPRValueType() {
result = getUnknownOpaqueType(getEllipsisVariableByteSize())
}
final override IREllipsisVariable getIRVariable() { result.getEnclosingFunction() = func }
}
private TranslatedConstructorInitList getTranslatedConstructorInitList(Function func) {

View File

@@ -211,6 +211,16 @@ class IRThrowVariable extends IRTempVariable {
override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -2,7 +2,8 @@ newtype TTempVariableTag =
ConditionValueTempVar() or
ReturnValueTempVar() or
ThrowTempVar() or
LambdaTempVar()
LambdaTempVar() or
EllipsisTempVar()
string getTempVariableTagId(TTempVariableTag tag) {
tag = ConditionValueTempVar() and result = "CondVal"
@@ -12,4 +13,6 @@ string getTempVariableTagId(TTempVariableTag tag) {
tag = ThrowTempVar() and result = "Throw"
or
tag = LambdaTempVar() and result = "Lambda"
or
tag = EllipsisTempVar() and result = "Ellipsis"
}

View File

@@ -0,0 +1,34 @@
| args.cpp:8:5:8:12 | call to global_1 | 0 | 0 |
| args.cpp:9:5:9:12 | call to global_2 | 0 | 0 |
| args.cpp:9:5:9:12 | call to global_2 | 1 | 1 |
| args.cpp:10:5:10:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:10:5:10:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 2 | 2 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 2 | 2 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 3 | 2 |
| args.cpp:22:5:22:12 | call to expression | 0 | 0 |
| args.cpp:23:5:23:15 | call to expression | 0 | 0 |
| args.cpp:23:5:23:15 | call to expression | 1 | 1 |
| args.cpp:24:5:24:22 | call to expression | 0 | 0 |
| args.cpp:24:5:24:22 | call to expression | 1 | 1 |
| args.cpp:25:5:25:25 | call to expression | 0 | 0 |
| args.cpp:25:5:25:25 | call to expression | 1 | 1 |
| args.cpp:25:5:25:25 | call to expression | 2 | 2 |
| args.cpp:26:5:26:28 | call to expression | 0 | 0 |
| args.cpp:26:5:26:28 | call to expression | 1 | 1 |
| args.cpp:26:5:26:28 | call to expression | 2 | 2 |
| args.cpp:26:5:26:28 | call to expression | 3 | 2 |
| args.cpp:37:10:37:11 | call to S | 0 | 0 |
| args.cpp:38:19:38:23 | call to S | 0 | 0 |
| args.cpp:38:19:38:23 | call to S | 1 | 1 |
| args.cpp:39:19:39:26 | call to S | 0 | 0 |
| args.cpp:39:19:39:26 | call to S | 1 | 1 |
| args.cpp:39:19:39:26 | call to S | 2 | 2 |
| args.cpp:40:19:40:29 | call to S | 0 | 0 |
| args.cpp:40:19:40:29 | call to S | 1 | 1 |
| args.cpp:40:19:40:29 | call to S | 2 | 2 |
| args.cpp:40:19:40:29 | call to S | 3 | 2 |

View File

@@ -0,0 +1,6 @@
import cpp
from Call call, int argIndex, int paramIndex
where
paramIndex = call.getParameterIndexForArgument(argIndex)
select call, argIndex, paramIndex

View File

@@ -0,0 +1,41 @@
void global_0();
void global_1(int a);
void global_2(int a, float b);
void global_2_vararg(int a, float b, ...);
void call_globals(int a, float b, void* c, bool d) {
global_0();
global_1(a);
global_2(a, b);
global_2_vararg(a, b);
global_2_vararg(a, b, c);
global_2_vararg(a, b, c, d);
}
void (*pfn_0)();
void (*pfn_1)(int a);
void (*pfn_2)(int a, float b);
void (*pfn_2_vararg)(int a, float b ...);
void call_pfns(int a, float b, void* c, bool d) {
pfn_0();
pfn_1(a);
pfn_2(a, b);
pfn_2_vararg(a, b);
pfn_2_vararg(a, b, c);
pfn_2_vararg(a, b, c, d);
}
struct S {
S();
S(int a);
S(int a, float b, ...);
};
void call_constructors(int a, float b, void* c, bool d) {
S s0;
S s1(a);
S s2_vararg_0(a, b);
S s2_vararg_1(a, b, c);
S s2_vararg_2(a, b, c, d);
}

View File

@@ -4483,49 +4483,53 @@ ir.cpp:
# 888| void VarArgUsage(int)
# 888| Block 0
# 888| v888_1(void) = EnterFunction :
# 888| mu888_2(unknown) = AliasedDefinition :
# 888| mu888_3(unknown) = InitializeNonLocal :
# 888| mu888_4(unknown) = UnmodeledDefinition :
# 888| r888_5(glval<int>) = VariableAddress[x] :
# 888| mu888_6(int) = InitializeParameter[x] : &:r888_5
# 889| r889_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 889| mu889_2(__va_list_tag[1]) = Uninitialized[args] : &:r889_1
# 891| r891_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 891| r891_2(__va_list_tag *) = Convert : r891_1
# 891| r891_3(glval<int>) = VariableAddress[x] :
# 891| v891_4(void) = VarArgsStart : 0:r891_2, 1:r891_3
# 892| r892_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 892| mu892_2(__va_list_tag[1]) = Uninitialized[args2] : &:r892_1
# 893| r893_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 893| r893_2(__va_list_tag *) = Convert : r893_1
# 893| r893_3(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 893| r893_4(__va_list_tag *) = Convert : r893_3
# 893| v893_5(void) = VarArgsStart : 0:r893_2, 1:r893_4
# 894| r894_1(glval<double>) = VariableAddress[d] :
# 894| r894_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 894| r894_3(__va_list_tag *) = Convert : r894_2
# 894| r894_4(glval<double>) = VarArg : 0:r894_3
# 894| r894_5(double) = Load : &:r894_4, ~mu888_4
# 894| mu894_6(double) = Store : &:r894_1, r894_5
# 895| r895_1(glval<float>) = VariableAddress[f] :
# 895| r895_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 895| r895_3(__va_list_tag *) = Convert : r895_2
# 895| r895_4(glval<double>) = VarArg : 0:r895_3
# 895| r895_5(double) = Load : &:r895_4, ~mu888_4
# 895| r895_6(float) = Convert : r895_5
# 895| mu895_7(float) = Store : &:r895_1, r895_6
# 896| r896_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 896| r896_2(__va_list_tag *) = Convert : r896_1
# 896| v896_3(void) = VarArgsEnd : 0:r896_2
# 897| r897_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 897| r897_2(__va_list_tag *) = Convert : r897_1
# 897| v897_3(void) = VarArgsEnd : 0:r897_2
# 898| v898_1(void) = NoOp :
# 888| v888_7(void) = ReturnVoid :
# 888| v888_8(void) = UnmodeledUse : mu*
# 888| v888_9(void) = AliasedUse : ~mu888_4
# 888| v888_10(void) = ExitFunction :
# 888| v888_1(void) = EnterFunction :
# 888| mu888_2(unknown) = AliasedDefinition :
# 888| mu888_3(unknown) = InitializeNonLocal :
# 888| mu888_4(unknown) = UnmodeledDefinition :
# 888| r888_5(glval<int>) = VariableAddress[x] :
# 888| mu888_6(int) = InitializeParameter[x] : &:r888_5
# 888| r888_7(glval<unknown>) = VariableAddress[#ellipsis] :
# 888| mu888_8(unknown[11]) = InitializeParameter[#ellipsis] : &:r888_7
# 888| r888_9(unknown[11]) = Load : &:r888_7, ~mu888_8
# 888| mu888_10(unknown) = InitializeIndirection[#ellipsis] : &:r888_9
# 889| r889_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 889| mu889_2(__va_list_tag[1]) = Uninitialized[args] : &:r889_1
# 891| r891_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 891| r891_2(__va_list_tag *) = Convert : r891_1
# 891| r891_3(glval<int>) = VariableAddress[x] :
# 891| v891_4(void) = VarArgsStart : 0:r891_2, 1:r891_3
# 892| r892_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 892| mu892_2(__va_list_tag[1]) = Uninitialized[args2] : &:r892_1
# 893| r893_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 893| r893_2(__va_list_tag *) = Convert : r893_1
# 893| r893_3(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 893| r893_4(__va_list_tag *) = Convert : r893_3
# 893| v893_5(void) = VarArgsStart : 0:r893_2, 1:r893_4
# 894| r894_1(glval<double>) = VariableAddress[d] :
# 894| r894_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 894| r894_3(__va_list_tag *) = Convert : r894_2
# 894| r894_4(glval<double>) = VarArg : 0:r894_3
# 894| r894_5(double) = Load : &:r894_4, ~mu888_4
# 894| mu894_6(double) = Store : &:r894_1, r894_5
# 895| r895_1(glval<float>) = VariableAddress[f] :
# 895| r895_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 895| r895_3(__va_list_tag *) = Convert : r895_2
# 895| r895_4(glval<double>) = VarArg : 0:r895_3
# 895| r895_5(double) = Load : &:r895_4, ~mu888_4
# 895| r895_6(float) = Convert : r895_5
# 895| mu895_7(float) = Store : &:r895_1, r895_6
# 896| r896_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 896| r896_2(__va_list_tag *) = Convert : r896_1
# 896| v896_3(void) = VarArgsEnd : 0:r896_2
# 897| r897_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 897| r897_2(__va_list_tag *) = Convert : r897_1
# 897| v897_3(void) = VarArgsEnd : 0:r897_2
# 898| v898_1(void) = NoOp :
# 888| v888_11(void) = ReturnVoid :
# 888| v888_12(void) = UnmodeledUse : mu*
# 888| v888_13(void) = AliasedUse : ~mu888_4
# 888| v888_14(void) = ExitFunction :
# 900| void CastToVoid(int)
# 900| Block 0