C++: Fix perf of IR value numbering

On some snapshots, notably ffmpeg, the IR `ValueNumbering` recursion
would generate billions of tuples and eventually run out of space.

It turns out it was fairly common for an `Instruction` to get more than
one `ValueNumber` in the base cases for `VariableAddressInstruction` and
`InitializeParameterInstruction`, and it could also happen in an
instruction with more than one operand of the same `OperandTag`. When a
binary operation was applied to an instruction with `m` value numbers
and another instruction with `n` value numbers, the result would get
`m * n` value numbers. This led to doubly-exponential growth in the
number of value numbers in rare cases.

The underlying reason why a `VariableAddressInstruction` could get
multiple value numbers is that it was keyed on the associated
`IRVariable`, and the `IRVariable` is defined in part by the type of its
underlying `Variable` (or other AST element). If the extractor defines a
variable to have multiple types because of linker ambiguity, this leads
to the creation of multiple `IRVariable`s. That should ideally be solved
in `TIRVariable.qll`, but for now I've put a workaround in
`ValueNumberingInternal.qll` instead.

To remove the problem with instructions having multiple operands, the
construction in `Operand.qll` will now filter out any such operand. It
wasn't enough to apply that filter to the `raw` stage, so I've applied
it to all three stages.
This commit is contained in:
Jonas Jensen
2020-02-12 14:04:03 +01:00
parent de45b1a08e
commit 033a4c30ea
8 changed files with 91 additions and 43 deletions

View File

@@ -11,13 +11,19 @@ cached
private newtype TOperand =
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
} or
TNonPhiMemoryOperand(
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
) {
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
(
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
or
tag instanceof UnmodeledUseOperandTag
)
} or
TPhiOperand(
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

View File

@@ -2,10 +2,10 @@ private import ValueNumberingImports
private import cpp
newtype TValueNumber =
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
variableAddressValueNumber(_, irFunc, var)
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
variableAddressValueNumber(_, irFunc, ast)
} or
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
initializeParameterValueNumber(_, irFunc, var)
} or
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
}
private predicate variableAddressValueNumber(
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = ast
}
private predicate initializeParameterValueNumber(
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = var
}
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
exists(IRFunction irFunc |
irFunc = instr.getEnclosingIRFunction() and
(
exists(IRVariable var |
variableAddressValueNumber(instr, irFunc, var) and
result = TVariableAddressValueNumber(irFunc, var)
exists(Language::AST ast |
variableAddressValueNumber(instr, irFunc, ast) and
result = TVariableAddressValueNumber(irFunc, ast)
)
or
exists(IRVariable var |
exists(Language::AST var |
initializeParameterValueNumber(instr, irFunc, var) and
result = TInitializeParameterValueNumber(irFunc, var)
)

View File

@@ -11,13 +11,19 @@ cached
private newtype TOperand =
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
} or
TNonPhiMemoryOperand(
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
) {
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
(
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
or
tag instanceof UnmodeledUseOperandTag
)
} or
TPhiOperand(
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

View File

@@ -2,10 +2,10 @@ private import ValueNumberingImports
private import cpp
newtype TValueNumber =
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
variableAddressValueNumber(_, irFunc, var)
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
variableAddressValueNumber(_, irFunc, ast)
} or
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
initializeParameterValueNumber(_, irFunc, var)
} or
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
}
private predicate variableAddressValueNumber(
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = ast
}
private predicate initializeParameterValueNumber(
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = var
}
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
exists(IRFunction irFunc |
irFunc = instr.getEnclosingIRFunction() and
(
exists(IRVariable var |
variableAddressValueNumber(instr, irFunc, var) and
result = TVariableAddressValueNumber(irFunc, var)
exists(Language::AST ast |
variableAddressValueNumber(instr, irFunc, ast) and
result = TVariableAddressValueNumber(irFunc, ast)
)
or
exists(IRVariable var |
exists(Language::AST var |
initializeParameterValueNumber(instr, irFunc, var) and
result = TInitializeParameterValueNumber(irFunc, var)
)

View File

@@ -11,13 +11,19 @@ cached
private newtype TOperand =
TRegisterOperand(Instruction useInstr, RegisterOperandTag tag, Instruction defInstr) {
defInstr = Construction::getRegisterOperandDefinition(useInstr, tag) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
strictcount(Construction::getRegisterOperandDefinition(useInstr, tag)) = 1
} or
TNonPhiMemoryOperand(
Instruction useInstr, MemoryOperandTag tag, Instruction defInstr, Overlap overlap
) {
defInstr = Construction::getMemoryOperandDefinition(useInstr, tag, overlap) and
not Construction::isInCycle(useInstr)
not Construction::isInCycle(useInstr) and
(
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
or
tag instanceof UnmodeledUseOperandTag
)
} or
TPhiOperand(
PhiInstruction useInstr, Instruction defInstr, IRBlock predecessorBlock, Overlap overlap

View File

@@ -2,10 +2,10 @@ private import ValueNumberingImports
private import cpp
newtype TValueNumber =
TVariableAddressValueNumber(IRFunction irFunc, IRVariable var) {
variableAddressValueNumber(_, irFunc, var)
TVariableAddressValueNumber(IRFunction irFunc, Language::AST ast) {
variableAddressValueNumber(_, irFunc, ast)
} or
TInitializeParameterValueNumber(IRFunction irFunc, IRVariable var) {
TInitializeParameterValueNumber(IRFunction irFunc, Language::AST var) {
initializeParameterValueNumber(_, irFunc, var)
} or
TInitializeThisValueNumber(IRFunction irFunc) { initializeThisValueNumber(_, irFunc) } or
@@ -100,17 +100,23 @@ private predicate numberableInstruction(Instruction instr) {
}
private predicate variableAddressValueNumber(
VariableAddressInstruction instr, IRFunction irFunc, IRVariable var
VariableAddressInstruction instr, IRFunction irFunc, Language::AST ast
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = ast
}
private predicate initializeParameterValueNumber(
InitializeParameterInstruction instr, IRFunction irFunc, IRVariable var
InitializeParameterInstruction instr, IRFunction irFunc, Language::AST var
) {
instr.getEnclosingIRFunction() = irFunc and
instr.getIRVariable() = var
// The underlying AST element is used as value-numbering key instead of the
// `IRVariable` to work around a problem where a variable or expression with
// multiple types gives rise to multiple `IRVariable`s.
instr.getIRVariable().getAST() = var
}
private predicate initializeThisValueNumber(InitializeThisInstruction instr, IRFunction irFunc) {
@@ -236,12 +242,12 @@ private TValueNumber nonUniqueValueNumber(Instruction instr) {
exists(IRFunction irFunc |
irFunc = instr.getEnclosingIRFunction() and
(
exists(IRVariable var |
variableAddressValueNumber(instr, irFunc, var) and
result = TVariableAddressValueNumber(irFunc, var)
exists(Language::AST ast |
variableAddressValueNumber(instr, irFunc, ast) and
result = TVariableAddressValueNumber(irFunc, ast)
)
or
exists(IRVariable var |
exists(Language::AST var |
initializeParameterValueNumber(instr, irFunc, var) and
result = TInitializeParameterValueNumber(irFunc, var)
)