Merge pull request #3082 from dbartol/dbartol/VarArgIR

C++: Model varargs in IR, Part I
This commit is contained in:
Mathias Vorreiter Pedersen
2020-03-19 18:05:46 +01:00
committed by GitHub
15 changed files with 370 additions and 70 deletions

View File

@@ -196,7 +196,7 @@ class FunctionCall extends Call, @funbindexpr {
* constructor calls, this predicate instead gets the `Class` of the constructor
* being called.
*/
private Type getTargetType() { result = Call.super.getType().stripType() }
Type getTargetType() { result = Call.super.getType().stripType() }
/**
* Gets the expected return type of the function called by this call.

View File

@@ -213,6 +213,16 @@ class IRThrowVariable extends IRTempVariable {
final override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -213,6 +213,16 @@ class IRThrowVariable extends IRTempVariable {
final override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -402,6 +402,7 @@ newtype TTranslatedElement =
translateFunction(func)
)
} or
TTranslatedEllipsisParameter(Function func) { translateFunction(func) and func.isVarargs() } or
TTranslatedReadEffects(Function func) { translateFunction(func) } or
// The read side effects in a function's return block
TTranslatedReadEffect(Parameter param) {

View File

@@ -10,12 +10,39 @@ private import TranslatedElement
private import TranslatedExpr
private import TranslatedInitialization
private import TranslatedStmt
private import VarArgs
/**
* Gets the `TranslatedFunction` that represents function `func`.
*/
TranslatedFunction getTranslatedFunction(Function func) { result.getAST() = func }
/**
* Gets the size, in bytes, of the variable used to represent the `...` parameter in a varargs
* function. This is determined by finding the total size of all of the arguments passed to the
* `...` in each call in the program, and choosing the maximum of those, with a minimum of 8 bytes.
*/
private int getEllipsisVariableByteSize() {
result =
max(int variableSize |
variableSize =
max(Call call, int callSize |
callSize =
sum(int argIndex |
isEllipsisArgumentIndex(call, argIndex)
|
call.getArgument(argIndex).getType().getSize()
)
|
callSize
)
or
variableSize = 8
|
variableSize
)
}
/**
* Represents the IR translation of a function. This is the root elements for
* all other elements associated with this function.
@@ -60,6 +87,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
final private TranslatedParameter getParameter(int index) {
result = getTranslatedParameter(func.getParameter(index))
or
index = getEllipsisParameterIndexForFunction(func) and
result = getTranslatedEllipsisParameter(func)
}
final override Instruction getFirstInstruction() { result = getInstruction(EnterFunctionTag()) }
@@ -113,7 +143,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
final override Instruction getChildSuccessor(TranslatedElement child) {
exists(int paramIndex |
child = getParameter(paramIndex) and
if exists(func.getParameter(paramIndex + 1))
if
exists(func.getParameter(paramIndex + 1)) or
getEllipsisParameterIndexForFunction(func) = paramIndex + 1
then result = getParameter(paramIndex + 1).getFirstInstruction()
else result = getConstructorInitList().getFirstInstruction()
)
@@ -237,10 +269,18 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
result = getReturnVariable()
}
final override predicate needsUnknownOpaqueType(int byteSize) {
byteSize = getEllipsisVariableByteSize()
}
final override predicate hasTempVariable(TempVariableTag tag, CppType type) {
tag = ReturnValueTempVar() and
hasReturnValue() and
type = getTypeForPRValue(getReturnType())
or
tag = EllipsisTempVar() and
func.isVarargs() and
type = getUnknownOpaqueType(getEllipsisVariableByteSize())
}
/**
@@ -316,34 +356,29 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
}
/**
* Gets the `TranslatedParameter` that represents parameter `param`.
* Gets the `TranslatedPositionalParameter` that represents parameter `param`.
*/
TranslatedParameter getTranslatedParameter(Parameter param) { result.getAST() = param }
TranslatedPositionalParameter getTranslatedParameter(Parameter param) { result.getAST() = param }
/**
* Represents the IR translation of a function parameter, including the
* initialization of that parameter with the incoming argument.
* Gets the `TranslatedEllipsisParameter` for function `func`, if one exists.
*/
class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
Parameter param;
TranslatedEllipsisParameter getTranslatedEllipsisParameter(Function func) {
result.getFunction() = func
}
TranslatedParameter() { this = TTranslatedParameter(param) }
final override string toString() { result = param.toString() }
final override Locatable getAST() { result = param }
final override Function getFunction() {
result = param.getFunction() or
result = param.getCatchBlock().getEnclosingFunction()
}
/**
* The IR translation of a parameter to a function. This can be either a user-declared parameter
* (`TranslatedPositionParameter`) or the synthesized parameter used to represent a `...` in a
* varargs function (`TranslatedEllipsisParameter`).
*/
abstract class TranslatedParameter extends TranslatedElement {
final override TranslatedElement getChild(int id) { none() }
final override Instruction getFirstInstruction() {
result = getInstruction(InitializerVariableAddressTag())
}
final override TranslatedElement getChild(int id) { none() }
final override Instruction getInstructionSuccessor(InstructionTag tag, EdgeKind kind) {
kind instanceof GotoEdge and
(
@@ -368,16 +403,16 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
final override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
tag = InitializerVariableAddressTag() and
opcode instanceof Opcode::VariableAddress and
resultType = getTypeForGLValue(getVariableType(param))
resultType = getGLValueType()
or
tag = InitializerStoreTag() and
opcode instanceof Opcode::InitializeParameter and
resultType = getTypeForPRValue(getVariableType(param))
resultType = getPRValueType()
or
hasIndirection() and
tag = InitializerIndirectAddressTag() and
opcode instanceof Opcode::Load and
resultType = getTypeForPRValue(getVariableType(param))
resultType = getPRValueType()
or
hasIndirection() and
tag = InitializerIndirectStoreTag() and
@@ -391,7 +426,7 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
tag = InitializerVariableAddressTag() or
tag = InitializerIndirectStoreTag()
) and
result = getIRUserVariable(getFunction(), param)
result = getIRVariable()
}
final override Instruction getInstructionOperand(InstructionTag tag, OperandTag operandTag) {
@@ -416,13 +451,74 @@ class TranslatedParameter extends TranslatedElement, TTranslatedParameter {
result = getInstruction(InitializerIndirectAddressTag())
}
predicate hasIndirection() {
abstract predicate hasIndirection();
abstract CppType getGLValueType();
abstract CppType getPRValueType();
abstract IRAutomaticVariable getIRVariable();
}
/**
* Represents the IR translation of a function parameter, including the
* initialization of that parameter with the incoming argument.
*/
class TranslatedPositionalParameter extends TranslatedParameter, TTranslatedParameter {
Parameter param;
TranslatedPositionalParameter() { this = TTranslatedParameter(param) }
final override string toString() { result = param.toString() }
final override Locatable getAST() { result = param }
final override Function getFunction() {
result = param.getFunction() or
result = param.getCatchBlock().getEnclosingFunction()
}
final override predicate hasIndirection() {
exists(Type t | t = param.getUnspecifiedType() |
t instanceof ArrayType or
t instanceof PointerType or
t instanceof ReferenceType
)
}
final override CppType getGLValueType() { result = getTypeForGLValue(getVariableType(param)) }
final override CppType getPRValueType() { result = getTypeForPRValue(getVariableType(param)) }
final override IRAutomaticUserVariable getIRVariable() {
result = getIRUserVariable(getFunction(), param)
}
}
/**
* The IR translation of the synthesized parameter used to represent the `...` in a varargs
* function.
*/
class TranslatedEllipsisParameter extends TranslatedParameter, TTranslatedEllipsisParameter {
Function func;
TranslatedEllipsisParameter() { this = TTranslatedEllipsisParameter(func) }
final override string toString() { result = "..." }
final override Locatable getAST() { result = func }
final override Function getFunction() { result = func }
final override predicate hasIndirection() { any() }
final override CppType getGLValueType() { result = getTypeForGLValue(any(UnknownType t)) }
final override CppType getPRValueType() {
result = getUnknownOpaqueType(getEllipsisVariableByteSize())
}
final override IREllipsisVariable getIRVariable() { result.getEnclosingFunction() = func }
}
private TranslatedConstructorInitList getTranslatedConstructorInitList(Function func) {

View File

@@ -0,0 +1,62 @@
/**
* Utilities for determining which parameters and arguments correspond to the `...` parameter for
* varargs functions.
*/
private import cpp
/**
* Gets the index of the `...` parameter, if any. If present, the value will always be equal to
* `func.getNumberOfParameters()`.
*/
int getEllipsisParameterIndexForFunction(Function func) {
func.isVarargs() and result = func.getNumberOfParameters()
}
/**
* Gets the index of the `...` parameter, if any.
*/
int getEllipsisParameterIndexForRoutineType(RoutineType type) {
// Since the extractor doesn't record this information directly, we look for routine types whose
// last parameter type is `UnknownType`.
type.getParameterType(result) instanceof UnknownType and
result = strictcount(type.getAParameterType()) - 1
}
/**
* Gets the index of the `...` parameter, if any. This will be one greater than the index of the
* last declared positional parameter.
*/
int getEllipsisParameterIndex(Call call) {
exists(FunctionCall funcCall |
funcCall = call and
if funcCall.getTargetType() instanceof RoutineType
then result = getEllipsisParameterIndexForRoutineType(funcCall.getTargetType())
else result = getEllipsisParameterIndexForFunction(funcCall.getTarget())
)
or
exists(ExprCall exprCall |
exprCall = call and
result = getEllipsisParameterIndexForRoutineType(exprCall.getExpr().getType().stripType())
)
}
/**
* Gets the index of the parameter that will be initialized with the value of the argument
* specified by `argIndex`. For ordinary positional parameters, the argument and parameter indices
* will be equal. For a call to a varargs function, all arguments passed to the `...` will be
* mapped to the index returned by `getEllipsisParameterIndex()`.
*/
int getParameterIndexForArgument(Call call, int argIndex) {
exists(call.getArgument(argIndex)) and
if argIndex >= getEllipsisParameterIndex(call)
then result = getEllipsisParameterIndex(call)
else result = argIndex
}
/**
* Holds if the argument specified by `index` is an argument to the `...` of a varargs function.
*/
predicate isEllipsisArgumentIndex(Call call, int index) {
exists(call.getArgument(index)) and index >= getEllipsisParameterIndex(call)
}

View File

@@ -213,6 +213,16 @@ class IRThrowVariable extends IRTempVariable {
final override string getBaseString() { result = "#throw" }
}
/**
* A temporary variable generated to hold the contents of all arguments passed to the `...` of a
* function that accepts a variable number of arguments.
*/
class IREllipsisVariable extends IRTempVariable {
IREllipsisVariable() { tag = EllipsisTempVar() }
final override string toString() { result = "#ellipsis" }
}
/**
* A variable generated to represent the contents of a string literal. This variable acts much like
* a read-only global variable.

View File

@@ -2,7 +2,8 @@ newtype TTempVariableTag =
ConditionValueTempVar() or
ReturnValueTempVar() or
ThrowTempVar() or
LambdaTempVar()
LambdaTempVar() or
EllipsisTempVar()
string getTempVariableTagId(TTempVariableTag tag) {
tag = ConditionValueTempVar() and result = "CondVal"
@@ -12,4 +13,6 @@ string getTempVariableTagId(TTempVariableTag tag) {
tag = ThrowTempVar() and result = "Throw"
or
tag = LambdaTempVar() and result = "Lambda"
or
tag = EllipsisTempVar() and result = "Ellipsis"
}

View File

@@ -0,0 +1,34 @@
| args.cpp:8:5:8:12 | call to global_1 | 0 | 0 |
| args.cpp:9:5:9:12 | call to global_2 | 0 | 0 |
| args.cpp:9:5:9:12 | call to global_2 | 1 | 1 |
| args.cpp:10:5:10:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:10:5:10:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:11:5:11:19 | call to global_2_vararg | 2 | 2 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 0 | 0 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 1 | 1 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 2 | 2 |
| args.cpp:12:5:12:19 | call to global_2_vararg | 3 | 2 |
| args.cpp:22:5:22:12 | call to expression | 0 | 0 |
| args.cpp:23:5:23:15 | call to expression | 0 | 0 |
| args.cpp:23:5:23:15 | call to expression | 1 | 1 |
| args.cpp:24:5:24:22 | call to expression | 0 | 0 |
| args.cpp:24:5:24:22 | call to expression | 1 | 1 |
| args.cpp:25:5:25:25 | call to expression | 0 | 0 |
| args.cpp:25:5:25:25 | call to expression | 1 | 1 |
| args.cpp:25:5:25:25 | call to expression | 2 | 2 |
| args.cpp:26:5:26:28 | call to expression | 0 | 0 |
| args.cpp:26:5:26:28 | call to expression | 1 | 1 |
| args.cpp:26:5:26:28 | call to expression | 2 | 2 |
| args.cpp:26:5:26:28 | call to expression | 3 | 2 |
| args.cpp:37:10:37:11 | call to S | 0 | 0 |
| args.cpp:38:19:38:23 | call to S | 0 | 0 |
| args.cpp:38:19:38:23 | call to S | 1 | 1 |
| args.cpp:39:19:39:26 | call to S | 0 | 0 |
| args.cpp:39:19:39:26 | call to S | 1 | 1 |
| args.cpp:39:19:39:26 | call to S | 2 | 2 |
| args.cpp:40:19:40:29 | call to S | 0 | 0 |
| args.cpp:40:19:40:29 | call to S | 1 | 1 |
| args.cpp:40:19:40:29 | call to S | 2 | 2 |
| args.cpp:40:19:40:29 | call to S | 3 | 2 |

View File

@@ -0,0 +1,6 @@
import cpp
import semmle.code.cpp.ir.implementation.raw.internal.VarArgs
from Call call, int argIndex, int paramIndex
where paramIndex = getParameterIndexForArgument(call, argIndex)
select call, argIndex, paramIndex

View File

@@ -0,0 +1,41 @@
void global_0();
void global_1(int a);
void global_2(int a, float b);
void global_2_vararg(int a, float b, ...);
void call_globals(int a, float b, void* c, bool d) {
global_0();
global_1(a);
global_2(a, b);
global_2_vararg(a, b);
global_2_vararg(a, b, c);
global_2_vararg(a, b, c, d);
}
void (*pfn_0)();
void (*pfn_1)(int a);
void (*pfn_2)(int a, float b);
void (*pfn_2_vararg)(int a, float b ...);
void call_pfns(int a, float b, void* c, bool d) {
pfn_0();
pfn_1(a);
pfn_2(a, b);
pfn_2_vararg(a, b);
pfn_2_vararg(a, b, c);
pfn_2_vararg(a, b, c, d);
}
struct S {
S();
S(int a);
S(int a, float b, ...);
};
void call_constructors(int a, float b, void* c, bool d) {
S s0;
S s1(a);
S s2_vararg_0(a, b);
S s2_vararg_1(a, b, c);
S s2_vararg_2(a, b, c, d);
}

View File

@@ -4483,49 +4483,53 @@ ir.cpp:
# 888| void VarArgUsage(int)
# 888| Block 0
# 888| v888_1(void) = EnterFunction :
# 888| mu888_2(unknown) = AliasedDefinition :
# 888| mu888_3(unknown) = InitializeNonLocal :
# 888| mu888_4(unknown) = UnmodeledDefinition :
# 888| r888_5(glval<int>) = VariableAddress[x] :
# 888| mu888_6(int) = InitializeParameter[x] : &:r888_5
# 889| r889_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 889| mu889_2(__va_list_tag[1]) = Uninitialized[args] : &:r889_1
# 891| r891_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 891| r891_2(__va_list_tag *) = Convert : r891_1
# 891| r891_3(glval<int>) = VariableAddress[x] :
# 891| v891_4(void) = VarArgsStart : 0:r891_2, 1:r891_3
# 892| r892_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 892| mu892_2(__va_list_tag[1]) = Uninitialized[args2] : &:r892_1
# 893| r893_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 893| r893_2(__va_list_tag *) = Convert : r893_1
# 893| r893_3(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 893| r893_4(__va_list_tag *) = Convert : r893_3
# 893| v893_5(void) = VarArgsStart : 0:r893_2, 1:r893_4
# 894| r894_1(glval<double>) = VariableAddress[d] :
# 894| r894_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 894| r894_3(__va_list_tag *) = Convert : r894_2
# 894| r894_4(glval<double>) = VarArg : 0:r894_3
# 894| r894_5(double) = Load : &:r894_4, ~mu888_4
# 894| mu894_6(double) = Store : &:r894_1, r894_5
# 895| r895_1(glval<float>) = VariableAddress[f] :
# 895| r895_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 895| r895_3(__va_list_tag *) = Convert : r895_2
# 895| r895_4(glval<double>) = VarArg : 0:r895_3
# 895| r895_5(double) = Load : &:r895_4, ~mu888_4
# 895| r895_6(float) = Convert : r895_5
# 895| mu895_7(float) = Store : &:r895_1, r895_6
# 896| r896_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 896| r896_2(__va_list_tag *) = Convert : r896_1
# 896| v896_3(void) = VarArgsEnd : 0:r896_2
# 897| r897_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 897| r897_2(__va_list_tag *) = Convert : r897_1
# 897| v897_3(void) = VarArgsEnd : 0:r897_2
# 898| v898_1(void) = NoOp :
# 888| v888_7(void) = ReturnVoid :
# 888| v888_8(void) = UnmodeledUse : mu*
# 888| v888_9(void) = AliasedUse : ~mu888_4
# 888| v888_10(void) = ExitFunction :
# 888| v888_1(void) = EnterFunction :
# 888| mu888_2(unknown) = AliasedDefinition :
# 888| mu888_3(unknown) = InitializeNonLocal :
# 888| mu888_4(unknown) = UnmodeledDefinition :
# 888| r888_5(glval<int>) = VariableAddress[x] :
# 888| mu888_6(int) = InitializeParameter[x] : &:r888_5
# 888| r888_7(glval<unknown>) = VariableAddress[#ellipsis] :
# 888| mu888_8(unknown[11]) = InitializeParameter[#ellipsis] : &:r888_7
# 888| r888_9(unknown[11]) = Load : &:r888_7, ~mu888_8
# 888| mu888_10(unknown) = InitializeIndirection[#ellipsis] : &:r888_9
# 889| r889_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 889| mu889_2(__va_list_tag[1]) = Uninitialized[args] : &:r889_1
# 891| r891_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 891| r891_2(__va_list_tag *) = Convert : r891_1
# 891| r891_3(glval<int>) = VariableAddress[x] :
# 891| v891_4(void) = VarArgsStart : 0:r891_2, 1:r891_3
# 892| r892_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 892| mu892_2(__va_list_tag[1]) = Uninitialized[args2] : &:r892_1
# 893| r893_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 893| r893_2(__va_list_tag *) = Convert : r893_1
# 893| r893_3(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 893| r893_4(__va_list_tag *) = Convert : r893_3
# 893| v893_5(void) = VarArgsStart : 0:r893_2, 1:r893_4
# 894| r894_1(glval<double>) = VariableAddress[d] :
# 894| r894_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 894| r894_3(__va_list_tag *) = Convert : r894_2
# 894| r894_4(glval<double>) = VarArg : 0:r894_3
# 894| r894_5(double) = Load : &:r894_4, ~mu888_4
# 894| mu894_6(double) = Store : &:r894_1, r894_5
# 895| r895_1(glval<float>) = VariableAddress[f] :
# 895| r895_2(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 895| r895_3(__va_list_tag *) = Convert : r895_2
# 895| r895_4(glval<double>) = VarArg : 0:r895_3
# 895| r895_5(double) = Load : &:r895_4, ~mu888_4
# 895| r895_6(float) = Convert : r895_5
# 895| mu895_7(float) = Store : &:r895_1, r895_6
# 896| r896_1(glval<__va_list_tag[1]>) = VariableAddress[args] :
# 896| r896_2(__va_list_tag *) = Convert : r896_1
# 896| v896_3(void) = VarArgsEnd : 0:r896_2
# 897| r897_1(glval<__va_list_tag[1]>) = VariableAddress[args2] :
# 897| r897_2(__va_list_tag *) = Convert : r897_1
# 897| v897_3(void) = VarArgsEnd : 0:r897_2
# 898| v898_1(void) = NoOp :
# 888| v888_11(void) = ReturnVoid :
# 888| v888_12(void) = UnmodeledUse : mu*
# 888| v888_13(void) = AliasedUse : ~mu888_4
# 888| v888_14(void) = ExitFunction :
# 900| void CastToVoid(int)
# 900| Block 0