mirror of
https://github.com/github/codeql.git
synced 2025-12-22 03:36:30 +01:00
C++: Prevent AliasedVirtualVariable from overlapping string literals
We were hitting a combinatorial explosion in `hasDefinitionAtRank` for functions that contain a large number of string literals. The problem was that every `Chi` instruction for `AliasedVirtualVariable` was treated as a definition of every string literal. We already mark string literals as `isReadOnly()`, but we were allowing `AliasedVirtualVariable` to define read-only locations so that the `AliasedDefinition` instruction would provide the initial definition for all string literals. To fix this, I've introduced the new `InitializeNonLocal` instruction, which is inserted in the prologue of every function right after `AliasedDefinition`. It provides the initial definition for every non-stack memory location, including read-only locations, but is never written to anywhere else. It is the conterpart of the `AliasedUse` instruction in the function epilogue, which represents the use of all non-stack memory after the function returns. I considered renaming `AliasedUse` to `ReturnNonLocal`, to match the `InitializeXXX`/`ReturnXXX` pattern we already use for parameters and indirections, but held off to avoid unnecessary churn. Any thoughts on whether I should make this name change? This change has a significant speedup in evaluation time for a few of our troublesome databases: `attnam/ivan`: 13% `awslabs/s2n`: 26% `SinaMostafanejad/OpenRDM`: 7% `zcoinofficial/zcoin`: 8%
This commit is contained in:
@@ -63,6 +63,7 @@ private newtype TOpcode =
|
||||
TUnmodeledDefinition() or
|
||||
TUnmodeledUse() or
|
||||
TAliasedDefinition() or
|
||||
TInitializeNonLocal() or
|
||||
TAliasedUse() or
|
||||
TPhi() or
|
||||
TBuiltIn() or
|
||||
@@ -596,6 +597,14 @@ module Opcode {
|
||||
final override MemoryAccessKind getWriteMemoryAccess() { result instanceof EscapedMemoryAccess }
|
||||
}
|
||||
|
||||
class InitializeNonLocal extends Opcode, TInitializeNonLocal {
|
||||
final override string toString() { result = "InitializeNonLocal" }
|
||||
|
||||
final override MemoryAccessKind getWriteMemoryAccess() {
|
||||
result instanceof NonLocalMemoryAccess
|
||||
}
|
||||
}
|
||||
|
||||
class AliasedUse extends Opcode, TAliasedUse {
|
||||
final override string toString() { result = "AliasedUse" }
|
||||
|
||||
|
||||
@@ -298,7 +298,7 @@ class AllNonLocalMemory extends TAllNonLocalMemory, MemoryLocation {
|
||||
|
||||
final override string toStringInternal() { result = "{AllNonLocal}" }
|
||||
|
||||
final override VirtualVariable getVirtualVariable() { result = TAllAliasedMemory(irFunc, false) }
|
||||
final override AliasedVirtualVariable getVirtualVariable() { result.getIRFunction() = irFunc }
|
||||
|
||||
final override Language::LanguageType getType() {
|
||||
result = any(IRUnknownType type).getCanonicalLanguageType()
|
||||
@@ -311,6 +311,14 @@ class AllNonLocalMemory extends TAllNonLocalMemory, MemoryLocation {
|
||||
final override string getUniqueId() { result = "{AllNonLocal}" }
|
||||
|
||||
final override predicate isMayAccess() { isMayAccess = true }
|
||||
|
||||
override predicate canDefineReadOnly() {
|
||||
// A "must" access that defines all non-local memory appears only on the `InitializeNonLocal`
|
||||
// instruction, which provides the initial definition for all memory outside of the current
|
||||
// function's stack frame. This memory includes string literals and other read-only globals, so
|
||||
// we allow such an access to be the definition for a use of a read-only location.
|
||||
not isMayAccess()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -341,16 +349,6 @@ class AllAliasedMemory extends TAllAliasedMemory, MemoryLocation {
|
||||
|
||||
class AliasedVirtualVariable extends AllAliasedMemory, VirtualVariable {
|
||||
AliasedVirtualVariable() { not isMayAccess() }
|
||||
|
||||
override predicate canDefineReadOnly() {
|
||||
// A must-def of all aliased memory is only used in two places:
|
||||
// 1. In the prologue of the function, to provide a definition for all memory defined before the
|
||||
// function was called. In this case, it needs to provide a definition even for read-only
|
||||
// non-local variables.
|
||||
// 2. As the result of a `Chi` instruction. These don't participate in overlap analysis, so it's
|
||||
// OK if we let this predicate hold in that case.
|
||||
any()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -405,10 +403,11 @@ private Overlap getExtentOverlap(MemoryLocation def, MemoryLocation use) {
|
||||
use instanceof AllNonLocalMemory and
|
||||
result instanceof MustExactlyOverlap
|
||||
or
|
||||
// AllNonLocalMemory may partially overlap any location within the same virtual variable,
|
||||
// except a local variable.
|
||||
result instanceof MayPartiallyOverlap and
|
||||
not use.isAlwaysAllocatedOnStack()
|
||||
// AllNonLocalMemory may partially overlap any other location within the same virtual
|
||||
// variable, except a stack variable.
|
||||
not use instanceof AllNonLocalMemory and
|
||||
not use.isAlwaysAllocatedOnStack() and
|
||||
result instanceof MayPartiallyOverlap
|
||||
)
|
||||
or
|
||||
def.getVirtualVariable() = use.getVirtualVariable() and
|
||||
|
||||
@@ -759,7 +759,21 @@ module DefUse {
|
||||
then defLocation = useLocation
|
||||
else (
|
||||
definitionHasPhiNode(defLocation, block) and
|
||||
defLocation = useLocation.getVirtualVariable()
|
||||
defLocation = useLocation.getVirtualVariable() and
|
||||
// Handle the unusual case where a virtual variable does not overlap one of its member
|
||||
// locations. For example, a definition of the virtual variable representing all aliased
|
||||
// memory does not overlap a use of a string literal, because the contents of a string
|
||||
// literal can never be redefined. The string literal's location could still be a member of
|
||||
// the `AliasedVirtualVariable` due to something like:
|
||||
// ```
|
||||
// char s[10];
|
||||
// strcpy(s, p);
|
||||
// const char* p = b ? "SomeLiteral" : s;
|
||||
// return p[3];
|
||||
// ```
|
||||
// In the above example, `p[3]` may access either the string literal or the local variable
|
||||
// `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
|
||||
exists(Alias::getOverlap(defLocation, useLocation))
|
||||
)
|
||||
)
|
||||
or
|
||||
|
||||
@@ -28,6 +28,7 @@ newtype TInstructionTag =
|
||||
UnmodeledDefinitionTag() or
|
||||
UnmodeledUseTag() or
|
||||
AliasedDefinitionTag() or
|
||||
InitializeNonLocalTag() or
|
||||
AliasedUseTag() or
|
||||
SwitchBranchTag() or
|
||||
CallTargetTag() or
|
||||
@@ -126,6 +127,8 @@ string getInstructionTagId(TInstructionTag tag) {
|
||||
or
|
||||
tag = AliasedDefinitionTag() and result = "AliasedDef"
|
||||
or
|
||||
tag = InitializeNonLocalTag() and result = "InitNonLocal"
|
||||
or
|
||||
tag = AliasedUseTag() and result = "AliasedUse"
|
||||
or
|
||||
tag = SwitchBranchTag() and result = "SwitchBranch"
|
||||
|
||||
@@ -71,6 +71,9 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
|
||||
result = getInstruction(AliasedDefinitionTag())
|
||||
or
|
||||
tag = AliasedDefinitionTag() and
|
||||
result = getInstruction(InitializeNonLocalTag())
|
||||
or
|
||||
tag = InitializeNonLocalTag() and
|
||||
result = getInstruction(UnmodeledDefinitionTag())
|
||||
or
|
||||
(
|
||||
@@ -144,6 +147,10 @@ class TranslatedFunction extends TranslatedElement, TTranslatedFunction {
|
||||
opcode instanceof Opcode::AliasedDefinition and
|
||||
resultType = getUnknownType()
|
||||
or
|
||||
tag = InitializeNonLocalTag() and
|
||||
opcode instanceof Opcode::InitializeNonLocal and
|
||||
resultType = getUnknownType()
|
||||
or
|
||||
tag = InitializeThisTag() and
|
||||
opcode instanceof Opcode::InitializeThis and
|
||||
resultType = getTypeForGLValue(getThisType())
|
||||
|
||||
@@ -759,7 +759,21 @@ module DefUse {
|
||||
then defLocation = useLocation
|
||||
else (
|
||||
definitionHasPhiNode(defLocation, block) and
|
||||
defLocation = useLocation.getVirtualVariable()
|
||||
defLocation = useLocation.getVirtualVariable() and
|
||||
// Handle the unusual case where a virtual variable does not overlap one of its member
|
||||
// locations. For example, a definition of the virtual variable representing all aliased
|
||||
// memory does not overlap a use of a string literal, because the contents of a string
|
||||
// literal can never be redefined. The string literal's location could still be a member of
|
||||
// the `AliasedVirtualVariable` due to something like:
|
||||
// ```
|
||||
// char s[10];
|
||||
// strcpy(s, p);
|
||||
// const char* p = b ? "SomeLiteral" : s;
|
||||
// return p[3];
|
||||
// ```
|
||||
// In the above example, `p[3]` may access either the string literal or the local variable
|
||||
// `s`, so both of those locations must be members of the `AliasedVirtualVariable`.
|
||||
exists(Alias::getOverlap(defLocation, useLocation))
|
||||
)
|
||||
)
|
||||
or
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user