C++: Prevent AliasedVirtualVariable from overlapping string literals

We were hitting a combinatorial explosion in `hasDefinitionAtRank` for functions that contain a large number of string literals. The problem was that every `Chi` instruction for `AliasedVirtualVariable` was treated as a definition of every string literal. We already mark string literals as `isReadOnly()`, but we were allowing `AliasedVirtualVariable` to define read-only locations so that the `AliasedDefinition` instruction would provide the initial definition for all string literals.

To fix this, I've introduced the new `InitializeNonLocal` instruction, which is inserted in the prologue of every function right after `AliasedDefinition`. It provides the initial definition for every non-stack memory location, including read-only locations, but is never written to anywhere else. It is the conterpart of the `AliasedUse` instruction in the function epilogue, which represents the use of all non-stack memory after the function returns. I considered renaming `AliasedUse` to `ReturnNonLocal`, to match the `InitializeXXX`/`ReturnXXX` pattern we already use for parameters and indirections, but held off to avoid unnecessary churn. Any thoughts on whether I should make this name change?

This change has a significant speedup in evaluation time for a few of our troublesome databases:
`attnam/ivan`: 13%
`awslabs/s2n`: 26%
`SinaMostafanejad/OpenRDM`: 7%
`zcoinofficial/zcoin`: 8%
This commit is contained in:
Dave Bartolomeo
2020-01-31 10:51:46 -07:00
parent 3b81c3b95c
commit e27a0fe504
13 changed files with 3652 additions and 3282 deletions

View File

@@ -63,6 +63,7 @@ private newtype TOpcode =
TUnmodeledDefinition() or
TUnmodeledUse() or
TAliasedDefinition() or
TInitializeNonLocal() or
TAliasedUse() or
TPhi() or
TBuiltIn() or
@@ -596,6 +597,14 @@ module Opcode {
final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess }
}
class InitializeNonLocal extends Opcode, TInitializeNonLocal {
final override string toString() { result = "InitializeNonLocal" }
final override MemoryAccessKind getWriteMemoryAccess() {
result instanceof NonLocalMemoryAccess
}
}
class AliasedUse extends Opcode, TAliasedUse {
final override string toString() { result = "AliasedUse" }

View File

@@ -298,7 +298,7 @@ class AllNonLocalMemory extends TAllNonLocalMemory, MemoryLocation {
final override string toStringInternal() { result = "{AllNonLocal}" }
final override VirtualVariable getVirtualVariable() { result = TAllAliasedMemory(irFunc, false) }
final override AliasedVirtualVariable getVirtualVariable() { result.getIRFunction() = irFunc }
final override Language::LanguageType getType() {
result = any(IRUnknownType type).getCanonicalLanguageType()
@@ -311,6 +311,14 @@ class AllNonLocalMemory extends TAllNonLocalMemory, MemoryLocation {
final override string getUniqueId() { result = "{AllNonLocal}" }
final override predicate isMayAccess() { isMayAccess = true }
override predicate canDefineReadOnly() {
// A "must" access that defines all non-local memory appears only on the `InitializeNonLocal`
// instruction, which provides the initial definition for all memory outside of the current
// function's stack frame. This memory includes string literals and other read-only globals, so
// we allow such an access to be the definition for a use of a read-only location.
not isMayAccess()
}
}
/**
@@ -341,16 +349,6 @@ class AllAliasedMemory extends TAllAliasedMemory, MemoryLocation {
class AliasedVirtualVariable extends AllAliasedMemory, VirtualVariable {
AliasedVirtualVariable() { not isMayAccess() }
override predicate canDefineReadOnly() {
// A must-def of all aliased memory is only used in two places:
// 1. In the prologue of the function, to provide a definition for all memory defined before the
// function was called. In this case, it needs to provide a definition even for read-only
// non-local variables.
// 2. As the result of a `Chi` instruction. These don't participate in overlap analysis, so it's
// OK if we let this predicate hold in that case.
any()
}
}
/**
@@ -405,10 +403,11 @@ private Overlap getExtentOverlap(MemoryLocation def, MemoryLocation use) {
use instanceof AllNonLocalMemory and
result instanceof MustExactlyOverlap
or
// AllNonLocalMemory may partially overlap any location within the same virtual variable,
// except a local variable.
result instanceof MayPartiallyOverlap and
not use.isAlwaysAllocatedOnStack()
// AllNonLocalMemory may partially overlap any other location within the same virtual
// variable, except a stack variable.
not use instanceof AllNonLocalMemory and
not use.isAlwaysAllocatedOnStack() and
result instanceof MayPartiallyOverlap
)
or
def.getVirtualVariable() = use.getVirtualVariable() and

View File

@@ -759,7 +759,21 @@ module DefUse {
then defLocation = useLocation
else (
definitionHasPhiNode(defLocation, block) and
defLocation = useLocation.getVirtualVariable()
defLocation = useLocation.getVirtualVariable() and
// Handle the unusual case where a virtual variable does not overlap one of its member
// locations. For example, a definition of the virtual variable representing all aliased
// memory does not overlap a use of a string literal, because the contents of a string
// literal can never be redefined. The string literal's location could still be a member of
// the `AliasedVirtualVariable` due to something like:
// ```
// char s[10];
// strcpy(s, p);
// const char* p = b ? "SomeLiteral" : s;
// return p[3];
// ```
// In the above example, `p[3]` may access either the string literal or the local variable
// `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
exists(Alias::getOverlap(defLocation, useLocation))
)
)
or

View File

@@ -28,6 +28,7 @@ newtype TInstructionTag =
UnmodeledDefinitionTag() or
UnmodeledUseTag() or
AliasedDefinitionTag() or
InitializeNonLocalTag() or
AliasedUseTag() or
SwitchBranchTag() or
CallTargetTag() or
@@ -126,6 +127,8 @@ string getInstructionTagId(TInstructionTag tag) {
or
tag = AliasedDefinitionTag() and result = "AliasedDef"
or
tag = InitializeNonLocalTag() and result = "InitNonLocal"
or
tag = AliasedUseTag() and result = "AliasedUse"
or
tag = SwitchBranchTag() and result = "SwitchBranch"

View File

@@ -71,6 +71,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
result = getInstruction(AliasedDefinitionTag())
or
tag = AliasedDefinitionTag() and
result = getInstruction(InitializeNonLocalTag())
or
tag = InitializeNonLocalTag() and
result = getInstruction(UnmodeledDefinitionTag())
or
(
@@ -144,6 +147,10 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
opcode instanceof Opcode::AliasedDefinition and
resultType = getUnknownType()
or
tag = InitializeNonLocalTag() and
opcode instanceof Opcode::InitializeNonLocal and
resultType = getUnknownType()
or
tag = InitializeThisTag() and
opcode instanceof Opcode::InitializeThis and
resultType = getTypeForGLValue(getThisType())

View File

@@ -759,7 +759,21 @@ module DefUse {
then defLocation = useLocation
else (
definitionHasPhiNode(defLocation, block) and
defLocation = useLocation.getVirtualVariable()
defLocation = useLocation.getVirtualVariable() and
// Handle the unusual case where a virtual variable does not overlap one of its member
// locations. For example, a definition of the virtual variable representing all aliased
// memory does not overlap a use of a string literal, because the contents of a string
// literal can never be redefined. The string literal's location could still be a member of
// the `AliasedVirtualVariable` due to something like:
// ```
// char s[10];
// strcpy(s, p);
// const char* p = b ? "SomeLiteral" : s;
// return p[3];
// ```
// In the above example, `p[3]` may access either the string literal or the local variable
// `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
exists(Alias::getOverlap(defLocation, useLocation))
)
)
or