Merge pull request #517 from dave-bartolomeo/dave/IRFilter

C++: Don't generate IR for functions with bad ASTs
This commit is contained in:
Jonas Jensen
2018-11-22 10:02:18 +01:00
committed by GitHub
12 changed files with 540 additions and 24 deletions

View File

@@ -59,18 +59,36 @@ private predicate locationSortKeys(Locatable ast, string file, int line,
)
}
private Function getEnclosingFunction(Locatable ast) {
result = ast.(Expr).getEnclosingFunction() or
result = ast.(Stmt).getEnclosingFunction() or
result = ast.(Initializer).getExpr().getEnclosingFunction() or
result = ast.(Parameter).getFunction() or
exists(DeclStmt stmt |
stmt.getADeclarationEntry() = ast and
result = stmt.getEnclosingFunction()
) or
result = ast
}
/**
* Most nodes are just a wrapper around `Locatable`, but we do synthesize new
* nodes for things like parameter lists and constructor init lists.
*/
private newtype TPrintASTNode =
TASTNode(Locatable ast) or
TParametersNode(Function func) or
TASTNode(Locatable ast) {
shouldPrintFunction(getEnclosingFunction(ast))
} or
TParametersNode(Function func) {
shouldPrintFunction(func)
} or
TConstructorInitializersNode(Constructor ctor) {
ctor.hasEntryPoint()
ctor.hasEntryPoint() and
shouldPrintFunction(ctor)
} or
TDestructorDestructionsNode(Destructor dtor) {
dtor.hasEntryPoint()
dtor.hasEntryPoint() and
shouldPrintFunction(dtor)
}
/**

View File

@@ -0,0 +1,19 @@
import cpp
private newtype TIRConfiguration = MkIRConfiguration()
/**
* The query can extend this class to control which functions have IR generated for them.
*/
class IRConfiguration extends TIRConfiguration {
string toString() {
result = "IRConfiguration"
}
/**
* Holds if IR should be created for function `func`. By default, holds for all functions.
*/
predicate shouldCreateIRForFunction(Function func) {
any()
}
}

View File

@@ -1,5 +1,40 @@
private import IR
import cpp
import semmle.code.cpp.ir.IRConfiguration
private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
/**
* The query can extend this class to control which functions are printed.
*/
class PrintIRConfiguration extends TPrintIRConfiguration {
string toString() {
result = "PrintIRConfiguration"
}
/**
* Holds if the IR for `func` should be printed. By default, holds for all
* functions.
*/
predicate shouldPrintFunction(Function func) {
any()
}
}
private predicate shouldPrintFunction(Function func) {
exists(PrintIRConfiguration config |
config.shouldPrintFunction(func)
)
}
/**
* Override of `IRConfiguration` to only create IR for the functions that are to be dumped.
*/
private class FilteredIRConfiguration extends IRConfiguration {
override predicate shouldCreateIRForFunction(Function func) {
shouldPrintFunction(func)
}
}
private string getAdditionalInstructionProperty(Instruction instr, string key) {
exists(IRPropertyProvider provider |
@@ -14,9 +49,15 @@ private string getAdditionalBlockProperty(IRBlock block, string key) {
}
private newtype TPrintableIRNode =
TPrintableFunctionIR(FunctionIR funcIR) or
TPrintableIRBlock(IRBlock block) or
TPrintableInstruction(Instruction instr)
TPrintableFunctionIR(FunctionIR funcIR) {
shouldPrintFunction(funcIR.getFunction())
} or
TPrintableIRBlock(IRBlock block) {
shouldPrintFunction(block.getFunction())
} or
TPrintableInstruction(Instruction instr) {
shouldPrintFunction(instr.getFunction())
}
/**
* A node to be emitted in the IR graph.

View File

@@ -1,5 +1,40 @@
private import IR
import cpp
import semmle.code.cpp.ir.IRConfiguration
private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
/**
* The query can extend this class to control which functions are printed.
*/
class PrintIRConfiguration extends TPrintIRConfiguration {
string toString() {
result = "PrintIRConfiguration"
}
/**
* Holds if the IR for `func` should be printed. By default, holds for all
* functions.
*/
predicate shouldPrintFunction(Function func) {
any()
}
}
private predicate shouldPrintFunction(Function func) {
exists(PrintIRConfiguration config |
config.shouldPrintFunction(func)
)
}
/**
* Override of `IRConfiguration` to only create IR for the functions that are to be dumped.
*/
private class FilteredIRConfiguration extends IRConfiguration {
override predicate shouldCreateIRForFunction(Function func) {
shouldPrintFunction(func)
}
}
private string getAdditionalInstructionProperty(Instruction instr, string key) {
exists(IRPropertyProvider provider |
@@ -14,9 +49,15 @@ private string getAdditionalBlockProperty(IRBlock block, string key) {
}
private newtype TPrintableIRNode =
TPrintableFunctionIR(FunctionIR funcIR) or
TPrintableIRBlock(IRBlock block) or
TPrintableInstruction(Instruction instr)
TPrintableFunctionIR(FunctionIR funcIR) {
shouldPrintFunction(funcIR.getFunction())
} or
TPrintableIRBlock(IRBlock block) {
shouldPrintFunction(block.getFunction())
} or
TPrintableInstruction(Instruction instr) {
shouldPrintFunction(instr.getFunction())
}
/**
* A node to be emitted in the IR graph.

View File

@@ -1,5 +1,7 @@
import cpp
import cpp
import semmle.code.cpp.ir.implementation.raw.IR
private import semmle.code.cpp.ir.IRConfiguration
private import semmle.code.cpp.ir.implementation.Opcode
private import semmle.code.cpp.ir.internal.OperandTag
private import semmle.code.cpp.ir.internal.TempVariableTag
@@ -83,7 +85,8 @@ private predicate ignoreExprOnly(Expr expr) {
// Ignore the allocator call, because we always synthesize it. Don't ignore
// its arguments, though, because we use them as part of the synthesis.
newExpr.getAllocatorCall() = expr
)
) or
not translateFunction(expr.getEnclosingFunction())
}
/**
@@ -94,6 +97,49 @@ private predicate ignoreExpr(Expr expr) {
ignoreExprAndDescendants(getRealParent*(expr))
}
/**
* Holds if `func` contains an AST that cannot be translated into IR. This is mostly used to work
* around extractor bugs. Once the relevant extractor bugs are fixed, this predicate can be removed.
*/
private predicate isInvalidFunction(Function func) {
exists(Literal literal |
// Constructor field inits within a compiler-generated copy constructor have a source expression
// that is a `Literal` with no value.
literal = func.(Constructor).getAnInitializer().(ConstructorFieldInit).getExpr() and
not exists(literal.getValue())
) or
exists(ThisExpr thisExpr |
// An instantiation of a member function template is not treated as a `MemberFunction` if it has
// only non-type template arguments.
thisExpr.getEnclosingFunction() = func and
not func instanceof MemberFunction
) or
exists(Expr expr |
// Expression missing a type.
expr.getEnclosingFunction() = func and
not exists(expr.getType())
)
}
/**
* Holds if `func` should be translated to IR.
*/
private predicate translateFunction(Function func) {
not func.isFromUninstantiatedTemplate(_) and
func.hasEntryPoint() and
not isInvalidFunction(func) and
exists(IRConfiguration config |
config.shouldCreateIRForFunction(func)
)
}
/**
* Holds if `stmt` should be translated to IR.
*/
private predicate translateStmt(Stmt stmt) {
translateFunction(stmt.getEnclosingFunction())
}
/**
* Holds if `expr` is most naturally evaluated as control flow, rather than as
* a value.
@@ -236,7 +282,7 @@ newtype TTranslatedElement =
} or
// The initialization of a field via a member of an initializer list.
TTranslatedExplicitFieldInitialization(Expr ast, Field field,
Expr expr) {
Expr expr) {
exists(ClassAggregateLiteral initList |
not ignoreExpr(initList) and
ast = initList and
@@ -260,14 +306,14 @@ newtype TTranslatedElement =
} or
// The initialization of an array element via a member of an initializer list.
TTranslatedExplicitElementInitialization(
ArrayAggregateLiteral initList, int elementIndex) {
ArrayAggregateLiteral initList, int elementIndex) {
not ignoreExpr(initList) and
exists(initList.getElementExpr(elementIndex))
} or
// The value initialization of a range of array elements that were omitted
// from an initializer list.
TTranslatedElementValueInitialization(ArrayAggregateLiteral initList,
int elementIndex, int elementCount) {
int elementIndex, int elementCount) {
not ignoreExpr(initList) and
isFirstValueInitializedElementInRange(initList, elementIndex) and
elementCount =
@@ -287,28 +333,35 @@ newtype TTranslatedElement =
not ignoreExpr(destruction)
} or
// A statement
TTranslatedStmt(Stmt stmt) or
TTranslatedStmt(Stmt stmt) {
translateStmt(stmt)
} or
// A function
TTranslatedFunction(Function func) {
func.hasEntryPoint() and
not func.isFromUninstantiatedTemplate(_)
translateFunction(func)
} or
// A constructor init list
TTranslatedConstructorInitList(Function func) {
func.hasEntryPoint()
translateFunction(func)
} or
// A destructor destruction list
TTranslatedDestructorDestructionList(Function func) {
func.hasEntryPoint()
translateFunction(func)
} or
// A function parameter
TTranslatedParameter(Parameter param) {
param.getFunction().hasEntryPoint() or
exists(param.getCatchBlock())
exists(Function func |
(
func = param.getFunction() or
func = param.getCatchBlock().getEnclosingFunction()
) and
translateFunction(func)
)
} or
// A local declaration
TTranslatedDeclarationEntry(DeclarationEntry entry) {
exists(DeclStmt declStmt |
translateStmt(declStmt) and
declStmt.getADeclarationEntry() = entry
)
} or

View File

@@ -1,5 +1,40 @@
private import IR
import cpp
import semmle.code.cpp.ir.IRConfiguration
private newtype TPrintIRConfiguration = MkPrintIRConfiguration()
/**
* The query can extend this class to control which functions are printed.
*/
class PrintIRConfiguration extends TPrintIRConfiguration {
string toString() {
result = "PrintIRConfiguration"
}
/**
* Holds if the IR for `func` should be printed. By default, holds for all
* functions.
*/
predicate shouldPrintFunction(Function func) {
any()
}
}
private predicate shouldPrintFunction(Function func) {
exists(PrintIRConfiguration config |
config.shouldPrintFunction(func)
)
}
/**
* Override of `IRConfiguration` to only create IR for the functions that are to be dumped.
*/
private class FilteredIRConfiguration extends IRConfiguration {
override predicate shouldCreateIRForFunction(Function func) {
shouldPrintFunction(func)
}
}
private string getAdditionalInstructionProperty(Instruction instr, string key) {
exists(IRPropertyProvider provider |
@@ -14,9 +49,15 @@ private string getAdditionalBlockProperty(IRBlock block, string key) {
}
private newtype TPrintableIRNode =
TPrintableFunctionIR(FunctionIR funcIR) or
TPrintableIRBlock(IRBlock block) or
TPrintableInstruction(Instruction instr)
TPrintableFunctionIR(FunctionIR funcIR) {
shouldPrintFunction(funcIR.getFunction())
} or
TPrintableIRBlock(IRBlock block) {
shouldPrintFunction(block.getFunction())
} or
TPrintableInstruction(Instruction instr) {
shouldPrintFunction(instr.getFunction())
}
/**
* A node to be emitted in the IR graph.

View File

@@ -34,6 +34,142 @@
#-----| Type = unsigned long
#-----| 1: p#1
#-----| Type = align_val_t
bad_asts.cpp:
# 5| Bad::S::operator=(S &&) -> S &
# 5| params:
#-----| 0: p#0
#-----| Type = S &&
# 5| Bad::S::operator=(const S &) -> S &
# 5| params:
#-----| 0: p#0
#-----| Type = const S &
# 9| Bad::S::MemberFunction(int) -> int
# 9| params:
# 9| 0: y
# 9| Type = int
# 9| body: { ... }
# 10| 0: return ...
# 10| 0: ... + ...
# 10| Type = int
# 10| ValueCategory = prvalue
# 10| 0: ... + ...
# 10| Type = int
# 10| ValueCategory = prvalue
# 10| 0: Unknown literal
# 10| Type = int
# 10| ValueCategory = prvalue
# 10| 1: x
# 10| Type = int
# 10| ValueCategory = prvalue(load)
#-----| -1: this
#-----| Type = S *
#-----| ValueCategory = prvalue(load)
# 10| 1: y
# 10| Type = int
# 10| ValueCategory = prvalue(load)
# 9| MemberFunction(int) -> int
# 9| params:
# 9| 0: y
# 9| Type = int
# 9| body: { ... }
# 10| 0: return ...
# 10| 0: ... + ...
# 10| Type = int
# 10| ValueCategory = prvalue
# 10| 0: ... + ...
# 10| Type = int
# 10| ValueCategory = prvalue
# 10| 0: 6
# 10| Type = int
# 10| Value = 6
# 10| ValueCategory = prvalue
# 10| 1: x
# 10| Type = int
# 10| ValueCategory = prvalue(load)
#-----| -1: this
#-----| Type = S *
#-----| ValueCategory = prvalue(load)
# 10| 1: y
# 10| Type = int
# 10| ValueCategory = prvalue(load)
# 14| Bad::CallBadMemberFunction() -> void
# 14| params:
# 14| body: { ... }
# 15| 0: declaration
# 15| 0: definition of s
# 15| Type = S
# 15| init: initializer for s
# 15| expr: {...}
# 15| Type = S
# 15| ValueCategory = prvalue
# 16| 1: ExprStmt
# 16| 0: call to MemberFunction
# 16| Type = int
# 16| ValueCategory = prvalue
# 16| -1: s
# 16| Type = S
# 16| ValueCategory = lvalue
# 16| 0: 1
# 16| Type = int
# 16| Value = 1
# 16| ValueCategory = prvalue
# 17| 2: return ...
# 19| Bad::Point::Point(Point &&) -> void
# 19| params:
#-----| 0: p#0
#-----| Type = Point &&
# 19| Bad::Point::Point(const Point &) -> void
# 19| params:
#-----| 0: p#0
#-----| Type = const Point &
# 19| initializations:
# 19| 0: constructor init of field x
# 19| Type = int
# 19| ValueCategory = prvalue
# 19| 0: Unknown literal
# 19| Type = int
# 19| ValueCategory = prvalue
# 19| 1: constructor init of field y
# 19| Type = int
# 19| ValueCategory = prvalue
# 19| 0: Unknown literal
# 19| Type = int
# 19| ValueCategory = prvalue
# 19| body: { ... }
# 19| 0: return ...
# 19| Bad::Point::operator=(Point &&) -> Point &
# 19| params:
#-----| 0: p#0
#-----| Type = Point &&
# 19| Bad::Point::operator=(const Point &) -> Point &
# 19| params:
#-----| 0: p#0
#-----| Type = const Point &
# 22| Bad::Point::Point() -> void
# 22| params:
# 22| initializations:
# 22| body: { ... }
# 23| 0: return ...
# 26| Bad::CallCopyConstructor(const Point &) -> void
# 26| params:
# 26| 0: a
# 26| Type = const Point &
# 26| body: { ... }
# 27| 0: declaration
# 27| 0: definition of b
# 27| Type = Point
# 27| init: initializer for b
# 27| expr: (Point)...
# 27| Conversion = glvalue conversion
# 27| Type = Point
# 27| ValueCategory = prvalue(load)
# 27| expr: (reference dereference)
# 27| Type = const Point
# 27| ValueCategory = lvalue
# 27| expr: a
# 27| Type = const Point &
# 27| ValueCategory = prvalue(load)
# 28| 1: return ...
ir.cpp:
# 1| Constants() -> void
# 1| params:

View File

@@ -1,3 +1,48 @@
bad_asts.cpp:
# 14| Bad::CallBadMemberFunction() -> void
# 14| Block 0
# 14| v0_0(void) = EnterFunction :
# 14| mu0_1(unknown) = UnmodeledDefinition :
# 15| r0_2(glval<S>) = VariableAddress[s] :
# 15| r0_3(glval<int>) = FieldAddress[x] : r0_2
# 15| r0_4(int) = Constant[0] :
# 15| mu0_5(int) = Store : r0_3, r0_4
# 16| r0_6(glval<S>) = VariableAddress[s] :
# 16| r0_7(glval<unknown>) = FunctionAddress[MemberFunction] :
# 16| r0_8(int) = Constant[1] :
# 16| r0_9(int) = Call : r0_7, this:r0_6, r0_8
# 17| v0_10(void) = NoOp :
# 14| v0_11(void) = ReturnVoid :
# 14| v0_12(void) = UnmodeledUse : mu*
# 14| v0_13(void) = ExitFunction :
# 22| Bad::Point::Point() -> void
# 22| Block 0
# 22| v0_0(void) = EnterFunction :
# 22| mu0_1(unknown) = UnmodeledDefinition :
# 22| r0_2(glval<Point>) = InitializeThis :
# 23| v0_3(void) = NoOp :
# 22| v0_4(void) = ReturnVoid :
# 22| v0_5(void) = UnmodeledUse : mu*
# 22| v0_6(void) = ExitFunction :
# 26| Bad::CallCopyConstructor(const Point &) -> void
# 26| Block 0
# 26| v0_0(void) = EnterFunction :
# 26| mu0_1(unknown) = UnmodeledDefinition :
# 26| r0_2(glval<Point &>) = VariableAddress[a] :
# 26| m0_3(Point &) = InitializeParameter[a] : r0_2
# 27| r0_4(glval<Point>) = VariableAddress[b] :
# 27| r0_5(glval<Point &>) = VariableAddress[a] :
# 27| r0_6(Point &) = Load : r0_5, m0_3
# 27| r0_7(glval<Point>) = Convert : r0_6
# 27| r0_8(Point) = Load : r0_7, mu0_1
# 27| m0_9(Point) = Store : r0_4, r0_8
# 28| v0_10(void) = NoOp :
# 26| v0_11(void) = ReturnVoid :
# 26| v0_12(void) = UnmodeledUse : mu*
# 26| v0_13(void) = ExitFunction :
ir.cpp:
# 1| Constants() -> void
# 1| Block 0

View File

@@ -0,0 +1,29 @@
// semmle-extractor-options: -std=c++17
// Test cases that illustrate known bad ASTs that we have to work around in IR generation.
namespace Bad {
struct S {
int x;
template<int t>
int MemberFunction(int y) {
return t + x + y;
}
};
void CallBadMemberFunction() {
S s = {};
s.MemberFunction<6>(1); // Not marked as member function in AST.
}
struct Point {
int x;
int y;
Point() {
}
};
void CallCopyConstructor(const Point& a) {
Point b = a; // Copy constructor contains literal expressions with no values.
}
}

View File

@@ -1,3 +1,48 @@
bad_asts.cpp:
# 14| Bad::CallBadMemberFunction() -> void
# 14| Block 0
# 14| v0_0(void) = EnterFunction :
# 14| mu0_1(unknown) = UnmodeledDefinition :
# 15| r0_2(glval<S>) = VariableAddress[s] :
# 15| r0_3(glval<int>) = FieldAddress[x] : r0_2
# 15| r0_4(int) = Constant[0] :
# 15| mu0_5(int) = Store : r0_3, r0_4
# 16| r0_6(glval<S>) = VariableAddress[s] :
# 16| r0_7(glval<unknown>) = FunctionAddress[MemberFunction] :
# 16| r0_8(int) = Constant[1] :
# 16| r0_9(int) = Call : r0_7, this:r0_6, r0_8
# 17| v0_10(void) = NoOp :
# 14| v0_11(void) = ReturnVoid :
# 14| v0_12(void) = UnmodeledUse : mu*
# 14| v0_13(void) = ExitFunction :
# 22| Bad::Point::Point() -> void
# 22| Block 0
# 22| v0_0(void) = EnterFunction :
# 22| mu0_1(unknown) = UnmodeledDefinition :
# 22| r0_2(glval<Point>) = InitializeThis :
# 23| v0_3(void) = NoOp :
# 22| v0_4(void) = ReturnVoid :
# 22| v0_5(void) = UnmodeledUse : mu*
# 22| v0_6(void) = ExitFunction :
# 26| Bad::CallCopyConstructor(const Point &) -> void
# 26| Block 0
# 26| v0_0(void) = EnterFunction :
# 26| mu0_1(unknown) = UnmodeledDefinition :
# 26| r0_2(glval<Point &>) = VariableAddress[a] :
# 26| mu0_3(Point &) = InitializeParameter[a] : r0_2
# 27| r0_4(glval<Point>) = VariableAddress[b] :
# 27| r0_5(glval<Point &>) = VariableAddress[a] :
# 27| r0_6(Point &) = Load : r0_5, mu0_1
# 27| r0_7(glval<Point>) = Convert : r0_6
# 27| r0_8(Point) = Load : r0_7, mu0_1
# 27| mu0_9(Point) = Store : r0_4, r0_8
# 28| v0_10(void) = NoOp :
# 26| v0_11(void) = ReturnVoid :
# 26| v0_12(void) = UnmodeledUse : mu*
# 26| v0_13(void) = ExitFunction :
ir.cpp:
# 1| Constants() -> void
# 1| Block 0

View File

@@ -8,6 +8,8 @@
| IR: C | 1 |
| IR: Call | 1 |
| IR: CallAdd | 1 |
| IR: CallBadMemberFunction | 1 |
| IR: CallCopyConstructor | 1 |
| IR: CallMethods | 1 |
| IR: CallMin | 1 |
| IR: CallNestedTemplateFunc | 1 |
@@ -74,6 +76,7 @@
| IR: OperatorNew | 1 |
| IR: OperatorNewArray | 1 |
| IR: Parameters | 1 |
| IR: Point | 1 |
| IR: PointerCompare | 1 |
| IR: PointerCrement | 1 |
| IR: PointerOps | 1 |

View File

@@ -1,3 +1,48 @@
bad_asts.cpp:
# 14| Bad::CallBadMemberFunction() -> void
# 14| Block 0
# 14| v0_0(void) = EnterFunction :
# 14| mu0_1(unknown) = UnmodeledDefinition :
# 15| r0_2(glval<S>) = VariableAddress[s] :
# 15| r0_3(glval<int>) = FieldAddress[x] : r0_2
# 15| r0_4(int) = Constant[0] :
# 15| mu0_5(int) = Store : r0_3, r0_4
# 16| r0_6(glval<S>) = VariableAddress[s] :
# 16| r0_7(glval<unknown>) = FunctionAddress[MemberFunction] :
# 16| r0_8(int) = Constant[1] :
# 16| r0_9(int) = Call : r0_7, this:r0_6, r0_8
# 17| v0_10(void) = NoOp :
# 14| v0_11(void) = ReturnVoid :
# 14| v0_12(void) = UnmodeledUse : mu*
# 14| v0_13(void) = ExitFunction :
# 22| Bad::Point::Point() -> void
# 22| Block 0
# 22| v0_0(void) = EnterFunction :
# 22| mu0_1(unknown) = UnmodeledDefinition :
# 22| r0_2(glval<Point>) = InitializeThis :
# 23| v0_3(void) = NoOp :
# 22| v0_4(void) = ReturnVoid :
# 22| v0_5(void) = UnmodeledUse : mu*
# 22| v0_6(void) = ExitFunction :
# 26| Bad::CallCopyConstructor(const Point &) -> void
# 26| Block 0
# 26| v0_0(void) = EnterFunction :
# 26| mu0_1(unknown) = UnmodeledDefinition :
# 26| r0_2(glval<Point &>) = VariableAddress[a] :
# 26| m0_3(Point &) = InitializeParameter[a] : r0_2
# 27| r0_4(glval<Point>) = VariableAddress[b] :
# 27| r0_5(glval<Point &>) = VariableAddress[a] :
# 27| r0_6(Point &) = Load : r0_5, m0_3
# 27| r0_7(glval<Point>) = Convert : r0_6
# 27| r0_8(Point) = Load : r0_7, mu0_1
# 27| m0_9(Point) = Store : r0_4, r0_8
# 28| v0_10(void) = NoOp :
# 26| v0_11(void) = ReturnVoid :
# 26| v0_12(void) = UnmodeledUse : mu*
# 26| v0_13(void) = ExitFunction :
ir.cpp:
# 1| Constants() -> void
# 1| Block 0