C++: Currently, to catch flow in an example such as:

```cpp
char* source();
void sink(const char*);
int sprintf(char *, const char *, ...);

void call_sprintf(char* path, char* data) {
        sprintf(path, "%s", "abc"); // (1)
        sprintf(path, "%s", data); // (2)
}

void foo() {
        char path[10];
        call_sprintf(path, source()); // (3)
        sink(path);
}
```
we identify that the `*path [post update]` node at `// (2)` is a
`ReturnNodeExt` and since `*data` flows to that node flow will be carried
out to `*path [post update]` at // (3) and thus reach `sink(path)`.

The reason `*path [post update]` at `// 2` is recognized as a `ReturnNodeExt`
is because it satisfies the following condition (which is identified by the
shared dataflow library):
There is flow from the parameter node `*path` to the pre-update node of the
post-update node `*path [post update]` at `// (2)`.

However, when we start recognizing that the call to `sprintf(path, ...)` at
`// (1)` overrides the value of `*path` and no longer provide use-use flow out
of `*path` the `*path [post update]` node at `// (2)` is no longer recognized
as a `ReturnNodeExt` (because it doesn't satisfy the above criteria).

Thus, we need to identify the flow above without relying on the dataflow
library's summary mechanism. That is, instead of relying on the dataflow
library's mechanism to summarize the `*data -> *path` flow for `call_sprintf`
we need to:
- Ensure that the write to `*path` at `// (2)` is recognized as the "final"
write to the parameter, and
- Ensure that there's flow out of that parameter and back to
`*path [post update]` at `// (3)`.

Luckiky, we do all of this already to support flow out of writes to parameters
that don't have post-update nodes. For example, in something like:
```cpp
void set(int* x, int y) {
  *x = y;
}

void test() {
  int x;
  set(&x, source());
  sink(x);
}
```
So in order to make the original example work, all we need to do is to remove
the restrictions on this mechanism so that the same mechanism that makes the
above example work also makes the original example work!
This commit is contained in:
Mathias Vorreiter Pedersen
2024-02-16 13:09:45 +01:00
parent 7e9bf2a880
commit 499ab0892f
2 changed files with 6 additions and 24 deletions

View File

@@ -55,29 +55,12 @@ private newtype TIRDataFlowNode =
TFinalParameterNode(Parameter p, int indirectionIndex) {
exists(Ssa::FinalParameterUse use |
use.getParameter() = p and
use.getIndirectionIndex() = indirectionIndex and
parameterIsRedefined(p)
use.getIndirectionIndex() = indirectionIndex
)
} or
TFinalGlobalValue(Ssa::GlobalUse globalUse) or
TInitialGlobalValue(Ssa::GlobalDef globalUse)
/**
* Holds if the value of `*p` (or `**p`, `***p`, etc.) is redefined somewhere in the body
* of the enclosing function of `p`.
*
* Only parameters satisfying this predicate will generate a `FinalParameterNode` transferring
* flow out of the function.
*/
private predicate parameterIsRedefined(Parameter p) {
exists(Ssa::Def def |
def.getSourceVariable().getBaseVariable().(Ssa::BaseIRVariable).getIRVariable().getAst() = p and
def.getIndirectionIndex() = 0 and
def.getIndirection() > 1 and
not def.getValue().asInstruction() instanceof InitializeParameterInstruction
)
}
/**
* An operand that is defined by a `FieldAddressInstruction`.
*/

View File

@@ -142,12 +142,11 @@ private newtype TDefOrUseImpl =
isIteratorUse(container, iteratorAddress, _, indirectionIndex)
} or
TFinalParameterUse(Parameter p, int indirectionIndex) {
// Avoid creating parameter nodes if there is no definitions of the variable other than the initializaion.
exists(SsaInternals0::Def def |
def.getSourceVariable().getBaseVariable().(BaseIRVariable).getIRVariable().getAst() = p and
not def.getValue().asInstruction() instanceof InitializeParameterInstruction and
underlyingTypeIsModifiableAt(p.getUnderlyingType(), indirectionIndex)
)
underlyingTypeIsModifiableAt(p.getUnderlyingType(), indirectionIndex) and
// Only create an SSA read for the final use of a parameter if there's
// actually a body of the enclosing function. If there's no function body
// then we'll never need to flow out of the function anyway.
p.getFunction().hasDefinition()
}
private predicate isGlobalUse(