Compare commits

..

1 Commits

Author SHA1 Message Date
Esben Sparre Andreasen
22c77ebb87 Use the js/ prefix for LdapInjection.ql 2021-11-02 12:29:13 +01:00
1875 changed files with 33462 additions and 95983 deletions

View File

@@ -1,23 +1,11 @@
{
"provide": [
"*/ql/src/qlpack.yml",
"*/ql/lib/qlpack.yml",
"*/ql/test/qlpack.yml",
"*/ql/examples/qlpack.yml",
"*/upgrades/qlpack.yml",
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
"misc/legacy-support/*/qlpack.yml",
"misc/suite-helpers/qlpack.yml",
"ruby/extractor-pack/codeql-extractor.yml",
"ruby/ql/consistency-queries/qlpack.yml"
],
"versionPolicies": {
"default": {
"requireChangeNotes": true,
"committedPrereleaseSuffix": "dev",
"committedVersion": "nextPatchRelease"
}
}
}
{ "provide": [ "ruby/.codeqlmanifest.json",
"*/ql/src/qlpack.yml",
"*/ql/lib/qlpack.yml",
"*/ql/test/qlpack.yml",
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
"*/ql/examples/qlpack.yml",
"*/upgrades/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
"misc/legacy-support/*/qlpack.yml",
"misc/suite-helpers/qlpack.yml" ] }

View File

@@ -8,7 +8,7 @@ runs:
run: |
LATEST=$(gh release list --repo https://github.com/github/codeql-cli-binaries | cut -f 1 | grep -v beta | sort --version-sort | tail -1)
gh release download --repo https://github.com/github/codeql-cli-binaries --pattern codeql-linux64.zip "$LATEST"
unzip -q -d "${RUNNER_TEMP}" codeql-linux64.zip
echo "${RUNNER_TEMP}/codeql" >> "${GITHUB_PATH}"
unzip -q codeql-linux64.zip
echo "${{ github.workspace }}/codeql" >> $GITHUB_PATH
env:
GITHUB_TOKEN: ${{ github.token }}

View File

@@ -1,31 +0,0 @@
name: Post pull-request comment
on:
workflow_run:
workflows: ["Query help preview"]
types:
- completed
permissions:
pull-requests: write
jobs:
post_comment:
runs-on: ubuntu-latest
steps:
- name: Download artifact
run: gh run download "${WORKFLOW_RUN_ID}" --repo "${GITHUB_REPOSITORY}" --name "comment"
env:
GITHUB_TOKEN: ${{ github.token }}
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
- run: |
PR="$(grep -o '^[0-9]\+$' pr.txt)"
PR_HEAD_SHA="$(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR}" --jq .head.sha)"
# Check that the pull-request head SHA matches the head SHA of the workflow run
if [ "${WORKFLOW_RUN_HEAD_SHA}" != "${PR_HEAD_SHA}" ]; then
echo "PR head SHA ${PR_HEAD_SHA} does not match workflow_run event SHA ${WORKFLOW_RUN_HEAD_SHA}. Stopping." 1>&2
exit 1
fi
gh pr comment "${PR}" --repo "${GITHUB_REPOSITORY}" -F comment.txt
env:
GITHUB_TOKEN: ${{ github.token }}
WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_commit.id }}

View File

@@ -1,13 +1,10 @@
name: Query help preview
permissions:
contents: read
on:
pull_request:
branches:
- main
- "rc/*"
- 'rc/*'
paths:
- "ruby/**/*.qhelp"
@@ -15,49 +12,28 @@ jobs:
qhelp:
runs-on: ubuntu-latest
steps:
- run: echo "${{ github.event.number }}" > pr.txt
- uses: actions/upload-artifact@v2
with:
name: comment
path: pr.txt
retention-days: 1
- uses: actions/checkout@v2
with:
fetch-depth: 2
persist-credentials: false
- uses: ./.github/actions/fetch-codeql
- name: Determine changed files
id: changes
run: |
(git diff -z --name-only --diff-filter=ACMRT HEAD~1 HEAD | grep -z '.qhelp$' | grep -z -v '.inc.qhelp';
git diff -z --name-only --diff-filter=ACMRT HEAD~1 HEAD | grep -z '.inc.qhelp$' | xargs --null -rn1 basename | xargs --null -rn1 git grep -z -l) |
grep -z '.qhelp$' | grep -z -v '^-' | sort -z -u > "${RUNNER_TEMP}/paths.txt"
echo -n "::set-output name=qhelp_files::"
(git diff --name-only --diff-filter=ACMRT HEAD~1 HEAD | grep .qhelp$ | grep -v .inc.qhelp;
git diff --name-only --diff-filter=ACMRT HEAD~1 HEAD | grep .inc.qhelp$ | xargs -d '\n' -rn1 basename | xargs -d '\n' -rn1 git grep -l) |
sort -u | xargs -d '\n' -n1 printf "'%s' "
- uses: ./.github/actions/fetch-codeql
- name: QHelp preview
if: ${{ steps.changes.outputs.qhelp_files }}
run: |
EXIT_CODE=0
echo "QHelp previews:" > comment.txt
while read -r -d $'\0' path; do
if [ ! -f "${path}" ]; then
exit 1
fi
( echo "QHelp previews:";
for path in ${{ steps.changes.outputs.qhelp_files }} ; do
echo "<details> <summary>${path}</summary>"
echo
codeql generate query-help --format=markdown -- "./${path}" 2> errors.txt || EXIT_CODE="$?"
if [ -s errors.txt ]; then
echo "# errors/warnings:"
echo '```'
cat errors.txt
cat errors.txt 1>&2
echo '```'
fi
codeql generate query-help --format=markdown ${path}
echo "</details>"
done < "${RUNNER_TEMP}/paths.txt" >> comment.txt
exit "${EXIT_CODE}"
- if: always()
uses: actions/upload-artifact@v2
with:
name: comment
path: comment.txt
retention-days: 1
done) | gh pr comment "${{ github.event.pull_request.number }}" -F -
env:
GITHUB_TOKEN: ${{ github.token }}

View File

@@ -3,18 +3,16 @@ name: "Ruby: Build"
on:
push:
paths:
- "ruby/**"
- .github/workflows/ruby-build.yml
- 'ruby/**'
branches:
- main
- "rc/*"
- 'rc/*'
pull_request:
paths:
- "ruby/**"
- .github/workflows/ruby-build.yml
- 'ruby/**'
branches:
- main
- "rc/*"
- 'rc/*'
workflow_dispatch:
inputs:
tag:
@@ -102,6 +100,16 @@ jobs:
PACK_FOLDER=$(readlink -f target/packs/codeql/ruby-queries/*)
codeql/codeql generate query-help --format=sarifv2.1.0 --output="${PACK_FOLDER}/rules.sarif" ql/src
(cd ql/src; find queries \( -name '*.qhelp' -o -name '*.rb' -o -name '*.erb' \) -exec bash -c 'mkdir -p "'"${PACK_FOLDER}"'/$(dirname "{}")"' \; -exec cp "{}" "${PACK_FOLDER}/{}" \;)
- name: Compile with previous CodeQL versions
run: |
for version in $(gh release list --repo https://github.com/github/codeql-cli-binaries | cut -f 1 | sort --version-sort | tail -3 | head -2); do
rm -f codeql-linux64.zip
gh release download --repo https://github.com/github/codeql-cli-binaries --pattern codeql-linux64.zip "$version"
rm -rf codeql; unzip -q codeql-linux64.zip
codeql/codeql query compile target/packs/*
done
env:
GITHUB_TOKEN: ${{ github.token }}
- uses: actions/upload-artifact@v2
with:
name: codeql-ruby-queries

View File

@@ -4,17 +4,15 @@ on:
push:
branches:
- main
- "rc/*"
- 'rc/*'
paths:
- ruby/ql/lib/ruby.dbscheme
- .github/workflows/ruby-dataset-measure.yml
pull_request:
branches:
- main
- "rc/*"
- 'rc/*'
paths:
- ruby/ql/lib/ruby.dbscheme
- .github/workflows/ruby-dataset-measure.yml
workflow_dispatch:
jobs:
@@ -24,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
repo: [rails/rails, discourse/discourse, spree/spree, ruby/ruby]
repo: [rails/rails, discourse/discourse, spree/spree]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -41,7 +39,7 @@ jobs:
- name: Create database
run: |
codeql database create \
--search-path "${{ github.workspace }}/ruby/extractor-pack" \
--search-path "${{ github.workspace }}/ruby" \
--threads 4 \
--language ruby --source-root "${{ github.workspace }}/repo" \
"${{ runner.temp }}/database"

View File

@@ -3,18 +3,16 @@ name: "Ruby: Run QL Tests"
on:
push:
paths:
- "ruby/**"
- .github/workflows/ruby-qltest.yml
- 'ruby/**'
branches:
- main
- "rc/*"
- 'rc/*'
pull_request:
paths:
- "ruby/**"
- .github/workflows/ruby-qltest.yml
- 'ruby/**'
branches:
- main
- "rc/*"
- 'rc/*'
env:
CARGO_TERM_COLOR: always
@@ -32,19 +30,19 @@ jobs:
- uses: ./ruby/actions/create-extractor-pack
- name: Run QL tests
run: |
codeql test run --search-path "${{ github.workspace }}/ruby/extractor-pack" --check-databases --check-unused-labels --check-repeated-labels --check-redefined-labels --check-use-before-definition --consistency-queries ql/consistency-queries ql/test
codeql test run --check-databases --check-unused-labels --check-repeated-labels --check-redefined-labels --check-use-before-definition --search-path "${{ github.workspace }}/ruby" --additional-packs "${{ github.workspace }}" --consistency-queries ql/consistency-queries ql/test
env:
GITHUB_TOKEN: ${{ github.token }}
- name: Check QL formatting
run: find ql "(" -name "*.ql" -or -name "*.qll" ")" -print0 | xargs -0 codeql query format --check-only
- name: Check QL compilation
run: |
codeql query compile --check-only --threads=4 --warnings=error "ql/src" "ql/examples"
codeql query compile --check-only --threads=4 --warnings=error --search-path "${{ github.workspace }}/ruby" --additional-packs "${{ github.workspace }}" "ql/src" "ql/examples"
env:
GITHUB_TOKEN: ${{ github.token }}
- name: Check DB upgrade scripts
run: |
echo >empty.trap
codeql dataset import -S ql/lib/upgrades/initial/ruby.dbscheme testdb empty.trap
codeql dataset upgrade testdb --additional-packs ql/lib
codeql dataset upgrade testdb --additional-packs ql/lib/upgrades
diff -q testdb/ruby.dbscheme ql/lib/ruby.dbscheme

3
.gitignore vendored
View File

@@ -27,6 +27,3 @@ csharp/extractor/Semmle.Extraction.CSharp.Driver/Properties/launchSettings.json
# Avoid committing cached package components
.codeql
# Compiled class file
*.class

View File

@@ -449,8 +449,7 @@
"csharp/ql/lib/semmle/code/csharp/controlflow/internal/pressa/SsaImplCommon.qll",
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/basessa/SsaImplCommon.qll",
"csharp/ql/lib/semmle/code/cil/internal/SsaImplCommon.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll",
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll"
"ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll"
],
"CryptoAlgorithms Python/JS": [
"javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll",
@@ -460,10 +459,9 @@
"javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll"
],
"ReDoS Util Python/JS/Ruby": [
"ReDoS Util Python/JS": [
"javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll"
],
"ReDoS Exponential Python/JS": [
"javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
@@ -472,12 +470,7 @@
"ReDoS Polynomial Python/JS": [
"javascript/ql/lib/semmle/javascript/security/performance/SuperlinearBackTracking.qll",
"python/ql/lib/semmle/python/security/performance/SuperlinearBackTracking.qll",
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll"
],
"BadTagFilterQuery Python/JS/Ruby": [
"javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll",
"python/ql/lib/semmle/python/security/BadTagFilterQuery.qll",
"ruby/ql/lib/codeql/ruby/security/BadTagFilterQuery.qll"
"ruby/ql/lib/codeql/ruby/regexp/SuperlinearBackTracking.qll"
],
"CFG": [
"csharp/ql/lib/semmle/code/csharp/controlflow/internal/ControlFlowGraphImplShared.qll",

View File

@@ -1,7 +1,4 @@
## 0.0.4
### New Features
lgtm,codescanning
* The QL library `semmle.code.cpp.commons.Exclusions` now contains a predicate
`isFromSystemMacroDefinition` for identifying code that originates from a
macro outside the project being analyzed.

View File

@@ -1,9 +0,0 @@
## 0.0.5
## 0.0.4
### New Features
* The QL library `semmle.code.cpp.commons.Exclusions` now contains a predicate
`isFromSystemMacroDefinition` for identifying code that originates from a
macro outside the project being analyzed.

View File

@@ -1 +0,0 @@
## 0.0.5

View File

@@ -1,2 +0,0 @@
---
lastReleaseVersion: 0.0.5

View File

@@ -1,8 +1,7 @@
name: codeql/cpp-all
version: 0.0.6-dev
groups: cpp
version: 0.0.2
dbscheme: semmlecode.cpp.dbscheme
extractor: cpp
library: true
dependencies:
codeql/cpp-upgrades: ^0.0.3
codeql/cpp-upgrades: 0.0.2

View File

@@ -3,14 +3,11 @@ private import semmle.code.cpp.models.interfaces.ArrayFunction
private import semmle.code.cpp.models.implementations.Strcat
import semmle.code.cpp.dataflow.DataFlow
/**
* Holds if the expression `e` assigns something including `va` to a
* stack variable `v0`.
*/
private predicate mayAddNullTerminatorHelper(Expr e, VariableAccess va, StackVariable v0) {
exists(Expr val |
exprDefinition(v0, e, val) and // `e` is `v0 := val`
val.getAChild*() = va
private predicate mayAddNullTerminatorHelper(Expr e, VariableAccess va, Expr e0) {
exists(StackVariable v0, Expr val |
exprDefinition(v0, e, val) and
val.getAChild*() = va and
mayAddNullTerminator(e0, v0.getAnAccess())
)
}
@@ -28,8 +25,8 @@ private predicate controlFlowNodeSuccessorTransitive(ControlFlowNode n1, Control
}
/**
* Holds if the expression `e` may add a null terminator to the string
* accessed by `va`.
* Holds if the expression `e` may add a null terminator to the string in
* variable `v`.
*/
predicate mayAddNullTerminator(Expr e, VariableAccess va) {
// Assignment: dereferencing or array access
@@ -46,9 +43,8 @@ predicate mayAddNullTerminator(Expr e, VariableAccess va) {
)
or
// Assignment to another stack variable
exists(StackVariable v0, Expr e0 |
mayAddNullTerminatorHelper(e, va, v0) and
mayAddNullTerminator(pragma[only_bind_into](e0), pragma[only_bind_into](v0.getAnAccess())) and
exists(Expr e0 |
mayAddNullTerminatorHelper(pragma[only_bind_into](e), va, pragma[only_bind_into](e0)) and
controlFlowNodeSuccessorTransitive(e, e0)
)
or

View File

@@ -6,8 +6,6 @@ import semmle.code.cpp.Type
import semmle.code.cpp.commons.CommonType
import semmle.code.cpp.commons.StringAnalysis
import semmle.code.cpp.models.interfaces.FormattingFunction
private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
class PrintfFormatAttribute extends FormatAttribute {
PrintfFormatAttribute() { this.getArchetype() = ["printf", "__printf__"] }
@@ -270,18 +268,6 @@ class FormattingFunctionCall extends Expr {
}
}
/**
* Gets the number of digits required to represent the integer represented by `f`.
*
* `f` is assumed to be nonnegative.
*/
bindingset[f]
private int lengthInBase10(float f) {
f = 0 and result = 1
or
result = f.log10().floor() + 1
}
/**
* A class to represent format strings that occur as arguments to invocations of formatting functions.
*/
@@ -1060,63 +1046,39 @@ class FormatLiteral extends Literal {
or
this.getConversionChar(n).toLowerCase() = ["d", "i"] and
// e.g. -2^31 = "-2147483648"
len =
min(float cand |
// The first case handles length sub-specifiers
// Subtract one in the exponent because one bit is for the sign.
// Add 1 to account for the possible sign in the output.
cand = 1 + lengthInBase10(2.pow(this.getIntegralDisplayType(n).getSize() * 8 - 1))
or
// The second case uses range analysis to deduce a length that's shorter than the length
// of the number -2^31.
exists(Expr arg, float lower, float upper |
arg = this.getUse().getConversionArgument(n) and
lower = lowerBound(arg.getFullyConverted()) and
upper = upperBound(arg.getFullyConverted())
|
cand =
max(int cand0 |
// Include the sign bit in the length if it can be negative
(
if lower < 0
then cand0 = 1 + lengthInBase10(lower.abs())
else cand0 = lengthInBase10(lower)
)
or
(
if upper < 0
then cand0 = 1 + lengthInBase10(upper.abs())
else cand0 = lengthInBase10(upper)
)
)
)
)
exists(int sizeBits |
sizeBits =
min(int bits |
bits = this.getIntegralDisplayType(n).getSize() * 8
or
exists(IntegralType t |
t = this.getUse().getConversionArgument(n).getType().getUnderlyingType()
|
t.isSigned() and bits = t.getSize() * 8
)
) and
len = 1 + ((sizeBits - 1) / 10.0.log2()).ceil()
// this calculation is as %u (below) only we take out the sign bit (- 1) and allow a whole
// character for it to be expressed as '-'.
)
or
this.getConversionChar(n).toLowerCase() = "u" and
// e.g. 2^32 - 1 = "4294967295"
len =
min(float cand |
// The first case handles length sub-specifiers
cand = 2.pow(this.getIntegralDisplayType(n).getSize() * 8)
or
// The second case uses range analysis to deduce a length that's shorter than
// the length of the number 2^31 - 1.
exists(Expr arg, float lower |
arg = this.getUse().getConversionArgument(n) and
lower = lowerBound(arg.getFullyConverted())
|
cand =
max(float cand0 |
// If lower can be negative we use `(unsigned)-1` as the candidate value.
lower < 0 and
cand0 = 2.pow(any(IntType t | t.isUnsigned()).getSize() * 8)
or
cand0 = upperBound(arg.getFullyConverted())
)
)
|
lengthInBase10(cand)
)
exists(int sizeBits |
sizeBits =
min(int bits |
bits = this.getIntegralDisplayType(n).getSize() * 8
or
exists(IntegralType t |
t = this.getUse().getConversionArgument(n).getType().getUnderlyingType()
|
t.isUnsigned() and bits = t.getSize() * 8
)
) and
len = (sizeBits / 10.0.log2()).ceil()
// convert the size from bits to decimal characters, and round up as you can't have
// fractional characters (10.0.log2() is the number of bits expressed per decimal character)
)
or
this.getConversionChar(n).toLowerCase() = "x" and
// e.g. "12345678"

View File

@@ -626,9 +626,9 @@ library class ExprEvaluator extends int {
// All assignments must have the same int value
result =
unique(Expr value |
value = v.getAnAssignedValue() and not this.ignoreVariableAssignment(e, v, value)
value = v.getAnAssignedValue() and not ignoreVariableAssignment(e, v, value)
|
this.getValueInternalNonSubExpr(value)
getValueInternalNonSubExpr(value)
)
)
}

View File

@@ -1,6 +1,4 @@
private import cpp
private import semmle.code.cpp.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.dataflow.internal.DataFlowUtil
/**
* Gets a function that might be called by `call`.
@@ -65,17 +63,3 @@ predicate mayBenefitFromCallContext(Call call, Function f) { none() }
* restricted to those `call`s for which a context might make a difference.
*/
Function viableImplInCallContext(Call call, Call ctx) { none() }
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { any(ParameterNode p).isParameterOf(_, this) }
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { any(ArgumentNode a).argumentOf(_, this) }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }

File diff suppressed because it is too large Load Diff

View File

@@ -2,42 +2,6 @@ private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
import Cached
module DataFlowImplCommonPublic {
private newtype TFlowFeature =
TFeatureHasSourceCallContext() or
TFeatureHasSinkCallContext() or
TFeatureEqualSourceSinkCallContext()
/** A flow configuration feature for use in `Configuration::getAFeature()`. */
class FlowFeature extends TFlowFeature {
string toString() { none() }
}
/**
* A flow configuration feature that implies that sources have some existing
* call context.
*/
class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext {
override string toString() { result = "FeatureHasSourceCallContext" }
}
/**
* A flow configuration feature that implies that sinks have some existing
* call context.
*/
class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext {
override string toString() { result = "FeatureHasSinkCallContext" }
}
/**
* A flow configuration feature that implies that source-sink pairs have some
* shared existing call context.
*/
class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext {
override string toString() { result = "FeatureEqualSourceSinkCallContext" }
}
}
/**
* The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
*
@@ -62,18 +26,6 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
tupleLimit = 1000
}
/**
* Holds if `arg` is an argument of `call` with an argument position that matches
* parameter position `ppos`.
*/
pragma[noinline]
predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPosition ppos) {
exists(ArgumentPosition apos |
arg.argumentOf(call, apos) and
parameterMatch(ppos, apos)
)
}
/**
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
* currently excludes read-steps, store-steps, and flow-through.
@@ -83,27 +35,25 @@ predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPositio
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
pragma[noinline]
private predicate viableParamNonLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallable(call), ppos)
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallable(call), i)
}
pragma[noinline]
private predicate viableParamLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), ppos)
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParamNonLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParamLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
@@ -336,7 +286,7 @@ private module Cached {
or
exists(ArgNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, k.(ParamUpdateReturnKind).getAMatchingArgumentPosition())
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
)
}
@@ -344,7 +294,7 @@ private module Cached {
predicate returnNodeExt(Node n, ReturnKindExt k) {
k = TValueReturn(n.(ReturnNode).getKind())
or
exists(ParamNode p, ParameterPosition pos |
exists(ParamNode p, int pos |
parameterValueFlowsToPreUpdate(p, n) and
p.isParameterOf(_, pos) and
k = TParamUpdate(pos)
@@ -366,13 +316,11 @@ private module Cached {
}
cached
predicate parameterNode(Node p, DataFlowCallable c, ParameterPosition pos) {
isParameterNode(p, c, pos)
}
predicate parameterNode(Node p, DataFlowCallable c, int pos) { isParameterNode(p, c, pos) }
cached
predicate argumentNode(Node n, DataFlowCall call, ArgumentPosition pos) {
isArgumentNode(n, call, pos)
predicate argumentNode(Node n, DataFlowCall call, int pos) {
n.(ArgumentNode).argumentOf(call, pos)
}
/**
@@ -390,12 +338,12 @@ private module Cached {
}
/**
* Holds if `p` is the parameter of a viable dispatch target of `call`,
* and `p` has position `ppos`.
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableExt(call), ppos)
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableExt(call), i)
}
/**
@@ -404,9 +352,9 @@ private module Cached {
*/
cached
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParam(call, ppos, p) and
argumentPositionMatch(call, arg, ppos) and
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
)
}
@@ -878,7 +826,7 @@ private module Cached {
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(ParameterPosition pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
@@ -1070,9 +1018,9 @@ class ParamNode extends Node {
/**
* Holds if this node is the parameter of callable `c` at the specified
* position.
* (zero-based) position.
*/
predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { parameterNode(this, c, pos) }
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
}
/** A data-flow node that represents a call argument. */
@@ -1080,9 +1028,7 @@ class ArgNode extends Node {
ArgNode() { argumentNode(this, _, _) }
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
argumentNode(this, call, pos)
}
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
}
/**
@@ -1128,14 +1074,11 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
private ParameterPosition pos;
private int pos;
ParamUpdateReturnKind() { this = TParamUpdate(pos) }
ParameterPosition getPosition() { result = pos }
pragma[nomagic]
ArgumentPosition getAMatchingArgumentPosition() { parameterMatch(pos, result) }
int getPosition() { result = pos }
override string toString() { result = "param update " + pos }
}

View File

@@ -9,19 +9,6 @@ private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
/** A class for configuring the consistency queries. */
class ConsistencyConfiguration extends TConsistencyConfiguration {
string toString() { none() }
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
predicate postWithInFlowExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
}
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
@@ -177,7 +164,7 @@ module Consistency {
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
@@ -190,7 +177,6 @@ module Consistency {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -2,20 +2,12 @@ private import cpp
private import DataFlowUtil
private import DataFlowDispatch
private import FlowVar
private import DataFlowImplConsistency
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) {
arg.argumentOf(c, pos)
}
predicate isParameterNode(ParameterNode p, DataFlowCallable c, int pos) { p.isParameterOf(c, pos) }
/** Gets the instance argument of a non-static call. */
private Node getInstanceArgument(Call call) {
@@ -267,6 +259,27 @@ class Unit extends TUnit {
string toString() { result = "unit" }
}
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) {
// Is the null pointer (or something that's not really a pointer)
exists(n.asExpr().getValue())
or
// Isn't a pointer or is a pointer to const
forall(DerivedType dt | dt = n.asExpr().getActualType() |
dt.getBaseType().isConst()
or
dt.getBaseType() instanceof RoutineType
)
// The above list of cases isn't exhaustive, but it narrows down the
// consistency alerts enough that most of them are interesting.
}
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }
@@ -289,19 +302,3 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
private class MyConsistencyConfiguration extends Consistency::ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
// Is the null pointer (or something that's not really a pointer)
exists(n.asExpr().getValue())
or
// Isn't a pointer or is a pointer to const
forall(DerivedType dt | dt = n.asExpr().getActualType() |
dt.getBaseType().isConst()
or
dt.getBaseType() instanceof RoutineType
)
// The above list of cases isn't exhaustive, but it narrows down the
// consistency alerts enough that most of them are interesting.
}
}

View File

@@ -474,25 +474,6 @@ module TaintedWithPath {
}
}
/**
* INTERNAL: Do not use.
*/
module Private {
/** Gets a predecessor `PathNode` of `pathNode`, if any. */
PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) }
/** Gets the element that `pathNode` wraps, if any. */
Element getElementFromPathNode(PathNode pathNode) {
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
result = node.asInstruction().getAST()
or
result = node.asOperand().getDef().getAST()
)
or
result = pathNode.(EndpointPathNode).inner()
}
}
private class WrapPathNode extends PathNode, TWrapPathNode {
DataFlow3::PathNode inner() { this = TWrapPathNode(result) }

View File

@@ -2,7 +2,6 @@ private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import DataFlowImplCommon as DataFlowImplCommon
/**
@@ -64,10 +63,8 @@ private module VirtualDispatch {
|
// Call argument
exists(DataFlowCall call, int i |
other
.(DataFlow::ParameterNode)
.isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and
src.(ArgumentNode).argumentOf(call, pragma[only_bind_into](pragma[only_bind_out](i)))
other.(DataFlow::ParameterNode).isParameterOf(call.getStaticCallTarget(), i) and
src.(ArgumentNode).argumentOf(call, i)
) and
allowOtherFromArg = true and
allowFromArg = true
@@ -131,7 +128,6 @@ private module VirtualDispatch {
*
* Used to fix a join ordering issue in flowsFrom.
*/
pragma[noinline]
private predicate returnNodeWithKindAndEnclosingCallable(
ReturnNode node, ReturnKind kind, DataFlowCallable callable
) {
@@ -267,17 +263,3 @@ Function viableImplInCallContext(CallInstruction call, CallInstruction ctx) {
result = ctx.getArgument(i).getUnconvertedResultExpression().(FunctionAccess).getTarget()
)
}
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { any(ParameterNode p).isParameterOf(_, this) }
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { any(ArgumentNode a).argumentOf(_, this) }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }

View File

@@ -2,42 +2,6 @@ private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
import Cached
module DataFlowImplCommonPublic {
private newtype TFlowFeature =
TFeatureHasSourceCallContext() or
TFeatureHasSinkCallContext() or
TFeatureEqualSourceSinkCallContext()
/** A flow configuration feature for use in `Configuration::getAFeature()`. */
class FlowFeature extends TFlowFeature {
string toString() { none() }
}
/**
* A flow configuration feature that implies that sources have some existing
* call context.
*/
class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext {
override string toString() { result = "FeatureHasSourceCallContext" }
}
/**
* A flow configuration feature that implies that sinks have some existing
* call context.
*/
class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext {
override string toString() { result = "FeatureHasSinkCallContext" }
}
/**
* A flow configuration feature that implies that source-sink pairs have some
* shared existing call context.
*/
class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext {
override string toString() { result = "FeatureEqualSourceSinkCallContext" }
}
}
/**
* The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
*
@@ -62,18 +26,6 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
tupleLimit = 1000
}
/**
* Holds if `arg` is an argument of `call` with an argument position that matches
* parameter position `ppos`.
*/
pragma[noinline]
predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPosition ppos) {
exists(ArgumentPosition apos |
arg.argumentOf(call, apos) and
parameterMatch(ppos, apos)
)
}
/**
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
* currently excludes read-steps, store-steps, and flow-through.
@@ -83,27 +35,25 @@ predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPositio
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
pragma[noinline]
private predicate viableParamNonLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallable(call), ppos)
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallable(call), i)
}
pragma[noinline]
private predicate viableParamLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), ppos)
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParamNonLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParamLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
)
}
@@ -336,7 +286,7 @@ private module Cached {
or
exists(ArgNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, k.(ParamUpdateReturnKind).getAMatchingArgumentPosition())
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
)
}
@@ -344,7 +294,7 @@ private module Cached {
predicate returnNodeExt(Node n, ReturnKindExt k) {
k = TValueReturn(n.(ReturnNode).getKind())
or
exists(ParamNode p, ParameterPosition pos |
exists(ParamNode p, int pos |
parameterValueFlowsToPreUpdate(p, n) and
p.isParameterOf(_, pos) and
k = TParamUpdate(pos)
@@ -366,13 +316,11 @@ private module Cached {
}
cached
predicate parameterNode(Node p, DataFlowCallable c, ParameterPosition pos) {
isParameterNode(p, c, pos)
}
predicate parameterNode(Node p, DataFlowCallable c, int pos) { isParameterNode(p, c, pos) }
cached
predicate argumentNode(Node n, DataFlowCall call, ArgumentPosition pos) {
isArgumentNode(n, call, pos)
predicate argumentNode(Node n, DataFlowCall call, int pos) {
n.(ArgumentNode).argumentOf(call, pos)
}
/**
@@ -390,12 +338,12 @@ private module Cached {
}
/**
* Holds if `p` is the parameter of a viable dispatch target of `call`,
* and `p` has position `ppos`.
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableExt(call), ppos)
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableExt(call), i)
}
/**
@@ -404,9 +352,9 @@ private module Cached {
*/
cached
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(ParameterPosition ppos |
viableParam(call, ppos, p) and
argumentPositionMatch(call, arg, ppos) and
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
)
}
@@ -878,7 +826,7 @@ private module Cached {
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(ParameterPosition pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
@@ -1070,9 +1018,9 @@ class ParamNode extends Node {
/**
* Holds if this node is the parameter of callable `c` at the specified
* position.
* (zero-based) position.
*/
predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { parameterNode(this, c, pos) }
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
}
/** A data-flow node that represents a call argument. */
@@ -1080,9 +1028,7 @@ class ArgNode extends Node {
ArgNode() { argumentNode(this, _, _) }
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
argumentNode(this, call, pos)
}
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
}
/**
@@ -1128,14 +1074,11 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
private ParameterPosition pos;
private int pos;
ParamUpdateReturnKind() { this = TParamUpdate(pos) }
ParameterPosition getPosition() { result = pos }
pragma[nomagic]
ArgumentPosition getAMatchingArgumentPosition() { parameterMatch(pos, result) }
int getPosition() { result = pos }
override string toString() { result = "param update " + pos }
}

View File

@@ -9,19 +9,6 @@ private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
/** A class for configuring the consistency queries. */
class ConsistencyConfiguration extends TConsistencyConfiguration {
string toString() { none() }
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
predicate postWithInFlowExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
}
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
@@ -177,7 +164,7 @@ module Consistency {
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
@@ -190,7 +177,6 @@ module Consistency {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -2,20 +2,12 @@ private import cpp
private import DataFlowUtil
private import semmle.code.cpp.ir.IR
private import DataFlowDispatch
private import DataFlowImplConsistency
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) {
arg.argumentOf(c, pos)
}
predicate isParameterNode(ParameterNode p, DataFlowCallable c, int pos) { p.isParameterOf(c, pos) }
/**
* A data flow node that occurs as the argument of a call and is passed as-is
@@ -120,9 +112,11 @@ class ReturnNode extends InstructionNode {
Instruction primary;
ReturnNode() {
exists(ReturnValueInstruction ret | instr = ret and primary = ret)
exists(ReturnValueInstruction ret | instr = ret.getReturnValue() and primary = ret)
or
exists(ReturnIndirectionInstruction rii | instr = rii and primary = rii)
exists(ReturnIndirectionInstruction rii |
instr = rii.getSideEffectOperand().getAnyDef() and primary = rii
)
}
/** Gets the kind of this returned value. */
@@ -196,16 +190,108 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
*/
predicate jumpStep(Node n1, Node n2) { none() }
private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(StoreInstruction store, Class c |
store = node2.asInstruction() and
store.getSourceValueOperand() = node1.asOperand() and
getWrittenField(store, f.getAField(), c) and
f.hasOffset(c, _, _)
)
}
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
result = instr or
result = instr.(CopyValueInstruction).getUnary()
}
pragma[noinline]
private predicate getWrittenField(Instruction instr, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa =
getFieldInstruction([
instr.(StoreInstruction).getDestinationAddress(),
instr.(WriteSideEffectInstruction).getDestinationAddress()
]) and
f = fa.getField() and
c = f.getDeclaringType()
)
}
private predicate fieldStoreStepChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(ChiPartialOperand operand, ChiInstruction chi |
chi.getPartialOperand() = operand and
node1.asOperand() = operand and
node2.asInstruction() = chi and
exists(Class c |
c = chi.getResultType() and
exists(int startBit, int endBit |
chi.getUpdatedInterval(startBit, endBit) and
f.hasOffset(c, startBit, endBit)
)
or
getWrittenField(operand.getDef(), f.getAField(), c) and
f.hasOffset(c, _, _)
)
)
}
private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode node2) {
exists(a) and
exists(ChiPartialOperand operand, ChiInstruction chi, StoreInstruction store |
chi.getPartialOperand() = operand and
store = operand.getDef() and
node1.asOperand() = operand and
// This `ChiInstruction` will always have a non-conflated result because both `ArrayStoreNode`
// and `PointerStoreNode` require it in their characteristic predicates.
node2.asInstruction() = chi and
(
// `x[i] = taint()`
// This matches the characteristic predicate in `ArrayStoreNode`.
store.getDestinationAddress() instanceof PointerAddInstruction
or
// `*p = taint()`
// This matches the characteristic predicate in `PointerStoreNode`.
store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction
)
)
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(StoreNodeInstr node1, FieldContent f, StoreNodeInstr node2) {
exists(FieldAddressInstruction fai |
node1.getInstruction() = fai and
node2.getInstruction() = fai.getObjectAddress() and
f.getField() = fai.getField()
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
fieldStoreStepNoChi(node1, f, node2) or
fieldStoreStepChi(node1, f, node2) or
arrayStoreStepChi(node1, f, node2) or
fieldStoreStepAfterArraySuppression(node1, f, node2)
}
// This predicate pushes the correct `FieldContent` onto the access path when the
// `suppressArrayRead` predicate has popped off an `ArrayContent`.
private predicate fieldStoreStepAfterArraySuppression(
Node node1, FieldContent f, PostUpdateNode node2
) {
exists(WriteSideEffectInstruction write, ChiInstruction chi, Class c |
not chi.isResultConflated() and
node1.asInstruction() = chi and
node2.asInstruction() = chi and
chi.getPartial() = write and
getWrittenField(write, f.getAField(), c) and
f.hasOffset(c, _, _)
)
}
bindingset[result, i]
private int unbindInt(int i) { i <= result and i >= result }
pragma[noinline]
private predicate getLoadedField(LoadInstruction load, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa = load.getSourceAddress() and
f = fa.getField() and
c = f.getDeclaringType()
)
}
@@ -214,14 +300,122 @@ predicate storeStep(StoreNodeInstr node1, FieldContent f, StoreNodeInstr node2)
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(ReadNode node1, FieldContent f, ReadNode node2) {
exists(FieldAddressInstruction fai |
node1.getInstruction() = fai.getObjectAddress() and
node2.getInstruction() = fai and
f.getField() = fai.getField()
private predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
exists(LoadOperand operand |
node2.asOperand() = operand and
node1.asInstruction() = operand.getAnyDef() and
exists(Class c |
c = operand.getAnyDef().getResultType() and
exists(int startBit, int endBit |
operand.getUsedInterval(unbindInt(startBit), unbindInt(endBit)) and
f.hasOffset(c, startBit, endBit)
)
or
getLoadedField(operand.getUse(), f.getAField(), c) and
f.hasOffset(c, _, _)
)
)
}
/**
* When a store step happens in a function that looks like an array write such as:
* ```cpp
* void f(int* pa) {
* pa = source();
* }
* ```
* it can be a write to an array, but it can also happen that `f` is called as `f(&a.x)`. If that is
* the case, the `ArrayContent` that was written by the call to `f` should be popped off the access
* path, and a `FieldContent` containing `x` should be pushed instead.
* So this case pops `ArrayContent` off the access path, and the `fieldStoreStepAfterArraySuppression`
* predicate in `storeStep` ensures that we push the right `FieldContent` onto the access path.
*/
predicate suppressArrayRead(Node node1, ArrayContent a, Node node2) {
exists(a) and
exists(WriteSideEffectInstruction write, ChiInstruction chi |
node1.asInstruction() = write and
node2.asInstruction() = chi and
chi.getPartial() = write and
getWrittenField(write, _, _)
)
}
private class ArrayToPointerConvertInstruction extends ConvertInstruction {
ArrayToPointerConvertInstruction() {
this.getUnary().getResultType() instanceof ArrayType and
this.getResultType() instanceof PointerType
}
}
private Instruction skipOneCopyValueInstructionRec(CopyValueInstruction copy) {
copy.getUnary() = result and not result instanceof CopyValueInstruction
or
result = skipOneCopyValueInstructionRec(copy.getUnary())
}
private Instruction skipCopyValueInstructions(Operand op) {
not result instanceof CopyValueInstruction and result = op.getDef()
or
result = skipOneCopyValueInstructionRec(op.getDef())
}
private predicate arrayReadStep(Node node1, ArrayContent a, Node node2) {
exists(a) and
// Explicit dereferences such as `*p` or `p[i]` where `p` is a pointer or array.
exists(LoadOperand operand, Instruction address |
operand.isDefinitionInexact() and
node1.asInstruction() = operand.getAnyDef() and
operand = node2.asOperand() and
address = skipCopyValueInstructions(operand.getAddressOperand()) and
(
address instanceof LoadInstruction or
address instanceof ArrayToPointerConvertInstruction or
address instanceof PointerOffsetInstruction
)
)
}
/**
* In cases such as:
* ```cpp
* void f(int* pa) {
* *pa = source();
* }
* ...
* int x;
* f(&x);
* use(x);
* ```
* the load on `x` in `use(x)` will exactly overlap with its definition (in this case the definition
* is a `WriteSideEffect`). This predicate pops the `ArrayContent` (pushed by the store in `f`)
* from the access path.
*/
private predicate exactReadStep(Node node1, ArrayContent a, Node node2) {
exists(a) and
exists(WriteSideEffectInstruction write, ChiInstruction chi |
not chi.isResultConflated() and
chi.getPartial() = write and
node1.asInstruction() = write and
node2.asInstruction() = chi and
// To distinquish this case from the `arrayReadStep` case we require that the entire variable was
// overwritten by the `WriteSideEffectInstruction` (i.e., there is a load that reads the
// entire variable).
exists(LoadInstruction load | load.getSourceValue() = chi)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, Content f, Node node2) {
fieldReadStep(node1, f, node2) or
arrayReadStep(node1, f, node2) or
exactReadStep(node1, f, node2) or
suppressArrayRead(node1, f, node2)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
@@ -253,7 +447,7 @@ private predicate suppressUnusedNode(Node n) { any() }
// Java QL library compatibility wrappers
//////////////////////////////////////////////////////////////////////////////
/** A node that performs a type cast. */
class CastNode extends Node {
class CastNode extends InstructionNode {
CastNode() { none() } // stub implementation
}
@@ -293,19 +487,22 @@ class Unit extends TUnit {
string toString() { result = "unit" }
}
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) {
n instanceof OperandNode and not n instanceof ArgumentNode
or
StoreNodeFlow::flowThrough(n, _) and
not StoreNodeFlow::flowOutOf(n, _) and
not StoreNodeFlow::flowInto(_, n)
or
ReadNodeFlow::flowThrough(n, _) and
not ReadNodeFlow::flowOutOf(n, _) and
not ReadNodeFlow::flowInto(_, n)
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) {
// The rules for whether an IR argument gets a post-update node are too
// complex to model here.
any()
}
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { n instanceof OperandNode and not n instanceof ArgumentNode }
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
@@ -325,11 +522,3 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
private class MyConsistencyConfiguration extends Consistency::ConsistencyConfiguration {
override predicate argHasPostUpdateExclude(ArgumentNode n) {
// The rules for whether an IR argument gets a post-update node are too
// complex to model here.
any()
}
}

View File

@@ -10,78 +10,19 @@ private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.models.interfaces.DataFlow
private import DataFlowPrivate
private import SsaInternals as Ssa
cached
private module Cached {
/**
* The IR dataflow graph consists of the following nodes:
* - `InstructionNode`, which represents an `Instruction` in the graph.
* - `OperandNode`, which represents an `Operand` in the graph.
* - `VariableNode`, which is used to model global variables.
* - Two kinds of `StoreNode`s:
* 1. `StoreNodeInstr`, which represents the value of an address computed by an `Instruction` that
* has been updated by a write operation.
* 2. `StoreNodeOperand`, which represents the value of an address in an `ArgumentOperand` after a
* function call that may have changed the value.
* - `ReadNode`, which represents the result of reading a field of an object.
* - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library.
*
* The following section describes how flow is generally transferred between these nodes:
* - Flow between `InstructionNode`s and `OperandNode`s follow the def-use information as computed by
* the IR. Because the IR compute must-alias information for memory operands, we only follow def-use
* flow for register operands.
* - Flow can enter a `StoreNode` in two ways (both done in `StoreNode.flowInto`):
* 1. Flow is transferred from a `StoreValueOperand` to a `StoreNodeInstr`. Flow will then proceed
* along the chain of addresses computed by `StoreNodeInstr.getInner` to identify field writes
* and call `storeStep` accordingly (i.e., for an expression like `a.b.c = x`, we visit `c`, then
* `b`, then `a`).
* 2. Flow is transfered from a `WriteSideEffectInstruction` to a `StoreNodeOperand` after flow
* returns to a caller. Flow will then proceed to the defining instruction of the operand (because
* the `StoreNodeInstr` computed by `StoreNodeOperand.getInner()` is the `StoreNode` containing
* the defining instruction), and then along the chain computed by `StoreNodeInstr.getInner` like
* above.
* In both cases, flow leaves a `StoreNode` once the entire chain has been traversed, and the shared
* SSA library is used to find the next use of the variable at the end of the chain.
* - Flow can enter a `ReadNode` through an `OperandNode` that represents an address of some variable.
* Flow will then proceed along the chain of addresses computed by `ReadNode.getOuter` (i.e., for an
* expression like `use(a.b.c)` we visit `a`, then `b`, then `c`) and call `readStep` accordingly.
* Once the entire chain has been traversed, flow is transferred to the load instruction that reads
* the final address of the chain.
* - Flow can enter a `SsaPhiNode` from an `InstructionNode`, a `StoreNode` or another `SsaPhiNode`
* (in `toPhiNode`), depending on which node provided the previous definition of the underlying
* variable. Flow leaves a `SsaPhiNode` (in `fromPhiNode`) by using the shared SSA library to
* determine the next use of the variable.
*/
cached
newtype TIRDataFlowNode =
TInstructionNode(Instruction i) or
TOperandNode(Operand op) or
TVariableNode(Variable var) or
TStoreNodeInstr(Instruction i) { Ssa::explicitWrite(_, _, i) } or
TStoreNodeOperand(ArgumentOperand op) { Ssa::explicitWrite(_, _, op.getDef()) } or
TReadNode(Instruction i) { needsPostReadNode(i) } or
TSsaPhiNode(Ssa::PhiNode phi)
TVariableNode(Variable var)
cached
predicate localFlowStepCached(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
}
private predicate needsPostReadNode(Instruction iFrom) {
// If the instruction generates an address that flows to a load.
Ssa::addressFlowTC(iFrom, Ssa::getSourceAddress(_)) and
(
// And it is either a field address
iFrom instanceof FieldAddressInstruction
or
// Or it is instruction that either uses or is used for an address that needs a post read node.
exists(Instruction mid | needsPostReadNode(mid) |
Ssa::addressFlow(mid, iFrom) or Ssa::addressFlow(iFrom, mid)
)
)
}
}
private import Cached
@@ -239,234 +180,6 @@ class OperandNode extends Node, TOperandNode {
override string toString() { result = this.getOperand().toString() }
}
/**
* INTERNAL: do not use.
*
* A `StoreNode` is a node that has been (or is about to be) the
* source or target of a `storeStep`.
*/
abstract private class StoreNode extends Node {
/** Holds if this node should receive flow from `addr`. */
abstract predicate flowInto(Instruction addr);
override Declaration getEnclosingCallable() { result = this.getFunction() }
/** Holds if this `StoreNode` is the root of the address computation used by a store operation. */
predicate isTerminal() {
not exists(this.getInner()) and
not storeStep(this, _, _)
}
/** Gets the store operation that uses the address computed by this `StoreNode`. */
abstract Instruction getStoreInstruction();
/** Holds if the store operation associated with this `StoreNode` overwrites the entire variable. */
final predicate isCertain() { Ssa::explicitWrite(true, this.getStoreInstruction(), _) }
/**
* Gets the `StoreNode` that computes the address used by this `StoreNode`.
*/
abstract StoreNode getInner();
/** The inverse of `StoreNode.getInner`. */
final StoreNode getOuter() { result.getInner() = this }
}
class StoreNodeInstr extends StoreNode, TStoreNodeInstr {
Instruction instr;
StoreNodeInstr() { this = TStoreNodeInstr(instr) }
override predicate flowInto(Instruction addr) { this.getInstruction() = addr }
/** Gets the underlying instruction. */
Instruction getInstruction() { result = instr }
override Function getFunction() { result = this.getInstruction().getEnclosingFunction() }
override IRType getType() { result = this.getInstruction().getResultIRType() }
override Location getLocation() { result = this.getInstruction().getLocation() }
override string toString() {
result = instructionNode(this.getInstruction()).toString() + " [store]"
}
override Instruction getStoreInstruction() {
Ssa::explicitWrite(_, result, this.getInstruction())
}
override StoreNodeInstr getInner() {
Ssa::addressFlow(result.getInstruction(), this.getInstruction())
}
}
/**
* To avoid having `PostUpdateNode`s with multiple pre-update nodes (which can cause performance
* problems) we attach the `PostUpdateNode` that represent output arguments to an operand instead of
* an instruction.
*
* To see why we need this, consider the expression `b->set(new C())`. The IR of this expression looks
* like (simplified):
* ```
* r1(glval<unknown>) = FunctionAddress[set] :
* r2(glval<unknown>) = FunctionAddress[operator new] :
* r3(unsigned long) = Constant[8] :
* r4(void *) = Call[operator new] : func:r2, 0:r3
* r5(C *) = Convert : r4
* r6(glval<unknown>) = FunctionAddress[C] :
* v1(void) = Call[C] : func:r6, this:r5
* v2(void) = Call[set] : func:r1, this:r0, 0:r5
* ```
*
* Notice that both the call to `C` and the call to `set` will have an argument that is the
* result of calling `operator new` (i.e., `r4`). If we only have `PostUpdateNode`s that are
* instructions, both `PostUpdateNode`s would have `r4` as their pre-update node.
*
* We avoid this issue by having a `PostUpdateNode` for each argument, and let the pre-update node of
* each `PostUpdateNode` be the argument _operand_, instead of the defining instruction.
*/
class StoreNodeOperand extends StoreNode, TStoreNodeOperand {
ArgumentOperand operand;
StoreNodeOperand() { this = TStoreNodeOperand(operand) }
override predicate flowInto(Instruction addr) { this.getOperand().getDef() = addr }
/** Gets the underlying operand. */
Operand getOperand() { result = operand }
override Function getFunction() { result = operand.getDef().getEnclosingFunction() }
override IRType getType() { result = operand.getIRType() }
override Location getLocation() { result = operand.getLocation() }
override string toString() { result = operandNode(this.getOperand()).toString() + " [store]" }
override WriteSideEffectInstruction getStoreInstruction() {
Ssa::explicitWrite(_, result, operand.getDef())
}
/**
* The result of `StoreNodeOperand.getInner` is the `StoreNodeInstr` representation the instruction
* that defines this operand. This means the graph of `getInner` looks like this:
* ```
* I---I---I
* \ \ \
* O O O
* ```
* where each `StoreNodeOperand` "hooks" into the chain computed by `StoreNodeInstr.getInner`.
* This means that the chain of `getInner` calls on the argument `&o.f` on an expression
* like `func(&o.f)` is:
* ```
* r4---r3---r2
* \
* 0:r4
* ```
* where the IR for `func(&o.f)` looks like (simplified):
* ```
* r1(glval<unknown>) = FunctionAddress[func] :
* r2(glval<O>) = VariableAddress[o] :
* r3(glval<int>) = FieldAddress[f] : r2
* r4(int *) = CopyValue : r3
* v1(void) = Call[func] : func:r1, 0:r4
* ```
*/
override StoreNodeInstr getInner() { operand.getDef() = result.getInstruction() }
}
/**
* INTERNAL: do not use.
*
* A `ReadNode` is a node that has been (or is about to be) the
* source or target of a `readStep`.
*/
class ReadNode extends Node, TReadNode {
Instruction i;
ReadNode() { this = TReadNode(i) }
/** Gets the underlying instruction. */
Instruction getInstruction() { result = i }
override Declaration getEnclosingCallable() { result = this.getFunction() }
override Function getFunction() { result = this.getInstruction().getEnclosingFunction() }
override IRType getType() { result = this.getInstruction().getResultIRType() }
override Location getLocation() { result = this.getInstruction().getLocation() }
override string toString() {
result = instructionNode(this.getInstruction()).toString() + " [read]"
}
/** Gets a load instruction that uses the address computed by this read node. */
final Instruction getALoadInstruction() {
Ssa::addressFlowTC(this.getInstruction(), Ssa::getSourceAddress(result))
}
/**
* Gets a read node with an underlying instruction that is used by this
* underlying instruction to compute an address of a load instruction.
*/
final ReadNode getInner() { Ssa::addressFlow(result.getInstruction(), this.getInstruction()) }
/** The inverse of `ReadNode.getInner`. */
final ReadNode getOuter() { result.getInner() = this }
/** Holds if this read node computes a value that will not be used for any future read nodes. */
final predicate isTerminal() {
not exists(this.getOuter()) and
not readStep(this, _, _)
}
/** Holds if this read node computes a value that has not yet been used for any read operations. */
final predicate isInitial() {
not exists(this.getInner()) and
not readStep(_, _, this)
}
}
/**
* INTERNAL: do not use.
*
* A phi node produced by the shared SSA library, viewed as a node in a data flow graph.
*/
class SsaPhiNode extends Node, TSsaPhiNode {
Ssa::PhiNode phi;
SsaPhiNode() { this = TSsaPhiNode(phi) }
/* Get the phi node associated with this node. */
Ssa::PhiNode getPhiNode() { result = phi }
override Declaration getEnclosingCallable() { result = this.getFunction() }
override Function getFunction() { result = phi.getBasicBlock().getEnclosingFunction() }
override IRType getType() { result instanceof IRVoidType }
override Location getLocation() { result = phi.getBasicBlock().getLocation() }
/** Holds if this phi node has input from the `rnk`'th write operation in block `block`. */
final predicate hasInputAtRankInBlock(IRBlock block, int rnk) {
this.hasInputAtRankInBlock(block, rnk, _)
}
/**
* Holds if this phi node has input from the definition `input` (which is the `rnk`'th write
* operation in block `block`).
*/
cached
final predicate hasInputAtRankInBlock(IRBlock block, int rnk, Ssa::Definition input) {
Ssa::phiHasInputFromBlock(phi, input, _) and input.definesAt(_, block, rnk)
}
override string toString() { result = "Phi" }
}
/**
* An expression, viewed as a node in a data flow graph.
*/
@@ -600,14 +313,15 @@ deprecated class UninitializedNode extends Node {
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ClassInstanceExpr`,
* which represents the value after the constructor has run.
*
* This class exists to match the interface used by Java. There are currently no non-abstract
* classes that extend it. When we implement field flow, we can revisit this.
*/
abstract class PostUpdateNode extends Node {
abstract class PostUpdateNode extends InstructionNode {
/**
* Gets the node before the state update.
*/
abstract Node getPreUpdateNode();
override string toString() { result = this.getPreUpdateNode() + " [post update]" }
}
/**
@@ -618,7 +332,7 @@ abstract class PostUpdateNode extends Node {
* value, but does not necessarily replace it entirely. For example:
* ```
* x.y = 1; // a partial definition of the object `x`.
* x.y.z = 1; // a partial definition of the object `x.y` and `x`.
* x.y.z = 1; // a partial definition of the object `x.y`.
* x.setY(1); // a partial definition of the object `x`.
* setY(&x); // a partial definition of the object `x`.
* ```
@@ -627,34 +341,135 @@ abstract private class PartialDefinitionNode extends PostUpdateNode {
abstract Expr getDefinedExpr();
}
private class FieldPartialDefinitionNode extends PartialDefinitionNode, StoreNodeInstr {
FieldPartialDefinitionNode() {
this.getInstruction() = any(FieldAddressInstruction fai).getObjectAddress()
private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
override ChiInstruction instr;
StoreInstruction store;
ExplicitFieldStoreQualifierNode() {
not instr.isResultConflated() and
instr.getPartial() = store and
(
instr.getUpdatedInterval(_, _) or
store.getDestinationAddress() instanceof FieldAddressInstruction
)
}
override Node getPreUpdateNode() { result.asInstruction() = this.getInstruction() }
override Expr getDefinedExpr() { result = this.getInstruction().getUnconvertedResultExpression() }
override string toString() { result = PartialDefinitionNode.super.toString() }
}
private class NonPartialDefinitionPostUpdate extends PostUpdateNode, StoreNodeInstr {
NonPartialDefinitionPostUpdate() { not this instanceof PartialDefinitionNode }
override Node getPreUpdateNode() { result.asInstruction() = this.getInstruction() }
override string toString() { result = PostUpdateNode.super.toString() }
}
private class ArgumentPostUpdateNode extends PartialDefinitionNode, StoreNodeOperand {
override ArgumentNode getPreUpdateNode() { result.asOperand() = operand }
// By using an operand as the result of this predicate we avoid the dataflow inconsistency errors
// caused by having multiple nodes sharing the same pre update node. This inconsistency error can cause
// a tuple explosion in the big step dataflow relation since it can make many nodes be the entry node
// into a big step.
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
override Expr getDefinedExpr() {
result = this.getOperand().getDef().getUnconvertedResultExpression()
result =
store
.getDestinationAddress()
.(FieldAddressInstruction)
.getObjectAddress()
.getUnconvertedResultExpression()
}
}
/**
* Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to.
* For instance, an update to a field of a struct containing only one field. Even if the store does
* have a chi instruction, a subsequent use of the result of the store may be linked directly to the
* result of the store as an inexact definition if the store totally overlaps the use. For these
* cases we attach the PostUpdateNode to the store instruction. There's no obvious pre update node
* for this case (as the entire memory is updated), so `getPreUpdateNode` is implemented as
* `none()`.
*/
private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode {
override StoreInstruction instr;
ExplicitSingleFieldStoreQualifierNode() {
(
instr.getAUse().isDefinitionInexact()
or
not exists(ChiInstruction chi | chi.getPartial() = instr)
) and
// Without this condition any store would create a `PostUpdateNode`.
instr.getDestinationAddress() instanceof FieldAddressInstruction
}
override string toString() { result = PartialDefinitionNode.super.toString() }
override Node getPreUpdateNode() { none() }
override Expr getDefinedExpr() {
result =
instr
.getDestinationAddress()
.(FieldAddressInstruction)
.getObjectAddress()
.getUnconvertedResultExpression()
}
}
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
result = instr or
result = instr.(CopyValueInstruction).getUnary()
}
/**
* The target of a `fieldStoreStepAfterArraySuppression` store step, which is used to convert
* an `ArrayContent` to a `FieldContent` when the `WriteSideEffect` instruction stores
* into a field. See the QLDoc for `suppressArrayRead` for an example of where such a conversion
* is inserted.
*/
private class WriteSideEffectFieldStoreQualifierNode extends PartialDefinitionNode {
override ChiInstruction instr;
WriteSideEffectInstruction write;
FieldAddressInstruction field;
WriteSideEffectFieldStoreQualifierNode() {
not instr.isResultConflated() and
instr.getPartial() = write and
field = getFieldInstruction(write.getDestinationAddress())
}
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
override Expr getDefinedExpr() {
result = field.getObjectAddress().getUnconvertedResultExpression()
}
}
/**
* The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
* `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.
*/
private class ArrayStoreNode extends PartialDefinitionNode {
override ChiInstruction instr;
PointerAddInstruction add;
ArrayStoreNode() {
not instr.isResultConflated() and
exists(StoreInstruction store |
instr.getPartial() = store and
add = store.getDestinationAddress()
)
}
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
override Expr getDefinedExpr() { result = add.getLeft().getUnconvertedResultExpression() }
}
/**
* The `PostUpdateNode` that is the target of a `arrayStoreStepChi` store step. The overriden
* `ChiInstruction` corresponds to the instruction represented by `node2` in `arrayStoreStepChi`.
*/
private class PointerStoreNode extends PostUpdateNode {
override ChiInstruction instr;
PointerStoreNode() {
not instr.isResultConflated() and
exists(StoreInstruction store |
instr.getPartial() = store and
store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction
)
}
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
}
/**
@@ -733,11 +548,6 @@ class VariableNode extends Node, TVariableNode {
*/
InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr }
/**
* Gets the node corresponding to `operand`.
*/
OperandNode operandNode(Operand operand) { result.getOperand() = operand }
/**
* DEPRECATED: use `definitionByReferenceNodeFromArgument` instead.
*
@@ -804,174 +614,59 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
or
// Instruction -> Operand flow
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
or
// Flow into, through, and out of store nodes
StoreNodeFlow::flowInto(nodeFrom.asInstruction(), nodeTo)
or
StoreNodeFlow::flowThrough(nodeFrom, nodeTo)
or
StoreNodeFlow::flowOutOf(nodeFrom, nodeTo)
or
// Flow into, through, and out of read nodes
ReadNodeFlow::flowInto(nodeFrom, nodeTo)
or
ReadNodeFlow::flowThrough(nodeFrom, nodeTo)
or
ReadNodeFlow::flowOutOf(nodeFrom, nodeTo)
or
// Adjacent-def-use and adjacent-use-use flow
adjacentDefUseFlow(nodeFrom, nodeTo)
}
private predicate adjacentDefUseFlow(Node nodeFrom, Node nodeTo) {
// Flow that isn't already covered by field flow out of store/read nodes.
not nodeFrom.asInstruction() = any(StoreNode pun).getStoreInstruction() and
not nodeFrom.asInstruction() = any(ReadNode pun).getALoadInstruction() and
(
//Def-use flow
Ssa::ssaFlow(nodeFrom, nodeTo)
or
// Use-use flow through stores.
exists(Instruction loadAddress, Node store |
loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) and
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
Ssa::ssaFlow(store, nodeTo)
)
pragma[noinline]
private predicate getFieldSizeOfClass(Class c, Type type, int size) {
exists(Field f |
f.getDeclaringType() = c and
f.getUnderlyingType() = type and
type.getSize() = size
)
}
/**
* INTERNAL: Do not use.
*/
module ReadNodeFlow {
/** Holds if the read node `nodeTo` should receive flow from `nodeFrom`. */
predicate flowInto(Node nodeFrom, ReadNode nodeTo) {
nodeTo.isInitial() and
(
// If we entered through an address operand.
nodeFrom.asOperand().getDef() = nodeTo.getInstruction()
or
// If we entered flow through a memory-producing instruction.
// This can happen if we have flow to an `InitializeParameterIndirection` through
// a `ReadSideEffectInstruction`.
exists(Instruction load, Instruction def |
def = nodeFrom.asInstruction() and
def = Ssa::getSourceValueOperand(load).getAnyDef() and
not def = any(StoreNode store).getStoreInstruction() and
pragma[only_bind_into](nodeTo).getALoadInstruction() = load
)
)
}
/**
* Holds if the read node `nodeTo` should receive flow from the read node `nodeFrom`.
*
* This happens when `readFrom` is _not_ the source of a `readStep`, and `nodeTo` is
* the `ReadNode` that represents an address that directly depends on `nodeFrom`.
*/
predicate flowThrough(ReadNode nodeFrom, ReadNode nodeTo) {
not readStep(nodeFrom, _, _) and
nodeFrom.getOuter() = nodeTo
}
/**
* Holds if flow should leave the read node `nFrom` and enter the node `nodeTo`.
* This happens either because there is use-use flow from one of the variables used in
* the read operation, or because we have traversed all the field dereferences in the
* read operation.
*/
predicate flowOutOf(ReadNode nFrom, Node nodeTo) {
// Use-use flow to another use of the same variable instruction
Ssa::ssaFlow(nFrom, nodeTo)
or
not exists(nFrom.getInner()) and
exists(Node store |
Ssa::explicitWrite(_, store.asInstruction(), nFrom.getInstruction()) and
Ssa::ssaFlow(store, nodeTo)
)
or
// Flow out of read nodes and into memory instructions if we cannot move any further through
// read nodes.
nFrom.isTerminal() and
(
exists(Instruction load |
load = nodeTo.asInstruction() and
Ssa::getSourceAddress(load) = nFrom.getInstruction()
)
or
exists(CallInstruction call, int i |
call.getArgument(i) = nodeTo.asInstruction() and
call.getArgument(i) = nFrom.getInstruction()
)
)
}
}
/**
* INTERNAL: Do not use.
*/
module StoreNodeFlow {
/** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */
predicate flowInto(Instruction instrFrom, StoreNode nodeTo) {
nodeTo.flowInto(Ssa::getDestinationAddress(instrFrom))
}
/**
* Holds if the store node `nodeTo` should receive flow from `nodeFom`.
*
* This happens when `nodeFrom` is _not_ the source of a `storeStep`, and `nodeFrom` is
* the `Storenode` that represents an address that directly depends on `nodeTo`.
*/
predicate flowThrough(StoreNode nodeFrom, StoreNode nodeTo) {
// Flow through a post update node that doesn't need a store step.
not storeStep(nodeFrom, _, _) and
nodeTo.getOuter() = nodeFrom
}
/**
* Holds if flow should leave the store node `nodeFrom` and enter the node `nodeTo`.
* This happens because we have traversed an entire chain of field dereferences
* after a store operation.
*/
predicate flowOutOf(StoreNodeInstr nFrom, Node nodeTo) {
nFrom.isTerminal() and
Ssa::ssaFlow(nFrom, nodeTo)
}
private predicate isSingleFieldClass(Type type, Operand op) {
exists(int size, Class c |
c = op.getType().getUnderlyingType() and
c.getSize() = size and
getFieldSizeOfClass(c, type, size)
)
}
private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
// Propagate flow from an instruction to its exact uses.
// We do this for all instruction/operand pairs, except when the operand is the
// side effect operand of a ReturnIndirectionInstruction, or the load operand of a LoadInstruction.
// This is because we get these flows through the shared SSA library already, and including this
// flow here will create multiple dataflow paths which creates a blowup in stage 3 of dataflow.
(
not any(ReturnIndirectionInstruction ret).getSideEffectOperand() = opTo and
not any(LoadInstruction load).getSourceValueOperand() = opTo and
not any(ReturnValueInstruction ret).getReturnValueOperand() = opTo
) and
opTo.getDef() = iFrom
}
pragma[noinline]
private predicate getAddressType(LoadInstruction load, Type t) {
exists(Instruction address |
address = load.getSourceAddress() and
t = address.getResultType()
or
opTo = any(ReadSideEffectInstruction read).getSideEffectOperand() and
not iFrom.isResultConflated() and
iFrom = opTo.getAnyDef()
or
// Loading a single `int` from an `int *` parameter is not an exact load since
// the parameter may point to an entire array rather than a single `int`. The
// following rule ensures that any flow going into the
// `InitializeIndirectionInstruction`, even if it's for a different array
// element, will propagate to a load of the first element.
//
// Since we're linking `InitializeIndirectionInstruction` and
// `LoadInstruction` together directly, this rule will break if there's any
// reassignment of the parameter indirection, including a conditional one that
// leads to a phi node.
exists(InitializeIndirectionInstruction init |
iFrom = init and
opTo.(LoadOperand).getAnyDef() = init and
// Check that the types match. Otherwise we can get flow from an object to
// its fields, which leads to field conflation when there's flow from other
// fields to the object elsewhere.
init.getParameter().getType().getUnspecifiedType().(DerivedType).getBaseType() =
opTo.getType().getUnspecifiedType()
)
or
// Flow from stores to structs with a single field to a load of that field.
exists(LoadInstruction load |
load.getSourceValueOperand() = opTo and
opTo.getAnyDef() = iFrom and
isSingleFieldClass(pragma[only_bind_out](pragma[only_bind_out](iFrom).getResultType()), opTo)
)
}
/**
* Like the AST dataflow library, we want to conflate the address and value of a reference. This class
* represents the `LoadInstruction` that is generated from a reference dereference.
*/
private class ReferenceDereferenceInstruction extends LoadInstruction {
ReferenceDereferenceInstruction() {
exists(ReferenceType ref |
getAddressType(this, ref) and
this.getResultType() = ref.getBaseType()
)
}
}
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
@@ -986,8 +681,40 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
or
iTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom
or
// Conflate references and values like in AST dataflow.
iTo.(ReferenceDereferenceInstruction).getSourceAddressOperand() = opFrom
// A chi instruction represents a point where a new value (the _partial_
// operand) may overwrite an old value (the _total_ operand), but the alias
// analysis couldn't determine that it surely will overwrite every bit of it or
// that it surely will overwrite no bit of it.
//
// By allowing flow through the total operand, we ensure that flow is not lost
// due to shortcomings of the alias analysis. We may get false flow in cases
// where the data is indeed overwritten.
//
// Flow through the partial operand belongs in the taint-tracking libraries
// for now.
iTo.getAnOperand().(ChiTotalOperand) = opFrom
or
// Add flow from write side-effects to non-conflated chi instructions through their
// partial operands. From there, a `readStep` will find subsequent reads of that field.
// Consider the following example:
// ```
// void setX(Point* p, int new_x) {
// p->x = new_x;
// }
// ...
// setX(&p, taint());
// ```
// Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
// `setX`, which will be melded into `p` through a chi instruction.
exists(ChiInstruction chi | chi = iTo |
opFrom.getAnyDef() instanceof WriteSideEffectInstruction and
chi.getPartialOperand() = opFrom and
not chi.isResultConflated() and
// In a call such as `set_value(&x->val);` we don't want the memory representing `x` to receive
// dataflow by a simple step. Instead, this is handled by field flow. If we add a simple step here
// we can get field-to-object flow.
not chi.isPartialUpdate()
)
or
// Flow through modeled functions
modelFlow(opFrom, iTo)
@@ -1061,14 +788,25 @@ predicate localInstructionFlow(Instruction e1, Instruction e2) {
*/
predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) }
/**
* Gets a field corresponding to the bit range `[startBit..endBit)` of class `c`, if any.
*/
private Field getAField(Class c, int startBit, int endBit) {
result.getDeclaringType() = c and
startBit = 8 * result.getByteOffset() and
endBit = 8 * result.getType().getSize() + startBit
or
exists(Field f, Class cInner |
f = c.getAField() and
cInner = f.getUnderlyingType() and
result = getAField(cInner, startBit - 8 * f.getByteOffset(), endBit - 8 * f.getByteOffset())
)
}
private newtype TContent =
TFieldContent(Field f) {
// As reads and writes to union fields can create flow even though the reads and writes
// target different fields, we don't want a read (write) to create a read (write) step.
not f.getDeclaringType() instanceof Union
} or
TCollectionContent() or // Not used in C/C++
TArrayContent() // Not used in C/C++.
TFieldContent(Class c, int startBit, int endBit) { exists(getAField(c, startBit, endBit)) } or
TCollectionContent() or
TArrayContent()
/**
* A description of the way data may be stored inside an object. Examples
@@ -1086,13 +824,18 @@ class Content extends TContent {
/** A reference through an instance field. */
class FieldContent extends Content, TFieldContent {
Field f;
Class c;
int startBit;
int endBit;
FieldContent() { this = TFieldContent(f) }
FieldContent() { this = TFieldContent(c, startBit, endBit) }
override string toString() { result = f.toString() }
// Ensure that there's just 1 result for `toString`.
override string toString() { result = min(Field f | f = this.getAField() | f.toString()) }
Field getField() { result = f }
predicate hasOffset(Class cl, int start, int end) { cl = c and start = startBit and end = endBit }
Field getAField() { result = getAField(c, startBit, endBit) }
}
/** A reference through an array. */

View File

@@ -1,662 +0,0 @@
/**
* Provides a language-independent implementation of static single assignment
* (SSA) form.
*/
private import SsaImplSpecific
private BasicBlock getABasicBlockPredecessor(BasicBlock bb) { getABasicBlockSuccessor(result) = bb }
/**
* Liveness analysis (based on source variables) to restrict the size of the
* SSA representation.
*/
private module Liveness {
/**
* A classification of variable references into reads (of a given kind) and
* (certain or uncertain) writes.
*/
private newtype TRefKind =
Read(boolean certain) { certain in [false, true] } or
Write(boolean certain) { certain in [false, true] }
private class RefKind extends TRefKind {
string toString() {
exists(boolean certain | this = Read(certain) and result = "read (" + certain + ")")
or
exists(boolean certain | this = Write(certain) and result = "write (" + certain + ")")
}
int getOrder() {
this = Read(_) and
result = 0
or
this = Write(_) and
result = 1
}
}
/**
* Holds if the `i`th node of basic block `bb` is a reference to `v` of kind `k`.
*/
private predicate ref(BasicBlock bb, int i, SourceVariable v, RefKind k) {
exists(boolean certain | variableRead(bb, i, v, certain) | k = Read(certain))
or
exists(boolean certain | variableWrite(bb, i, v, certain) | k = Write(certain))
}
private newtype OrderedRefIndex =
MkOrderedRefIndex(int i, int tag) {
exists(RefKind rk | ref(_, i, _, rk) | tag = rk.getOrder())
}
private OrderedRefIndex refOrd(BasicBlock bb, int i, SourceVariable v, RefKind k, int ord) {
ref(bb, i, v, k) and
result = MkOrderedRefIndex(i, ord) and
ord = k.getOrder()
}
/**
* Gets the (1-based) rank of the reference to `v` at the `i`th node of
* basic block `bb`, which has the given reference kind `k`.
*
* Reads are considered before writes when they happen at the same index.
*/
private int refRank(BasicBlock bb, int i, SourceVariable v, RefKind k) {
refOrd(bb, i, v, k, _) =
rank[result](int j, int ord, OrderedRefIndex res |
res = refOrd(bb, j, v, _, ord)
|
res order by j, ord
)
}
private int maxRefRank(BasicBlock bb, SourceVariable v) {
result = refRank(bb, _, v, _) and
not result + 1 = refRank(bb, _, v, _)
}
/**
* Gets the (1-based) rank of the first reference to `v` inside basic block `bb`
* that is either a read or a certain write.
*/
private int firstReadOrCertainWrite(BasicBlock bb, SourceVariable v) {
result =
min(int r, RefKind k |
r = refRank(bb, _, v, k) and
k != Write(false)
|
r
)
}
/**
* Holds if source variable `v` is live at the beginning of basic block `bb`.
*/
predicate liveAtEntry(BasicBlock bb, SourceVariable v) {
// The first read or certain write to `v` inside `bb` is a read
refRank(bb, _, v, Read(_)) = firstReadOrCertainWrite(bb, v)
or
// There is no certain write to `v` inside `bb`, but `v` is live at entry
// to a successor basic block of `bb`
not exists(firstReadOrCertainWrite(bb, v)) and
liveAtExit(bb, v)
}
/**
* Holds if source variable `v` is live at the end of basic block `bb`.
*/
predicate liveAtExit(BasicBlock bb, SourceVariable v) {
liveAtEntry(getABasicBlockSuccessor(bb), v)
}
/**
* Holds if variable `v` is live in basic block `bb` at index `i`.
* The rank of `i` is `rnk` as defined by `refRank()`.
*/
private predicate liveAtRank(BasicBlock bb, int i, SourceVariable v, int rnk) {
exists(RefKind kind | rnk = refRank(bb, i, v, kind) |
rnk = maxRefRank(bb, v) and
liveAtExit(bb, v)
or
ref(bb, i, v, kind) and
kind = Read(_)
or
exists(RefKind nextKind |
liveAtRank(bb, _, v, rnk + 1) and
rnk + 1 = refRank(bb, _, v, nextKind) and
nextKind != Write(true)
)
)
}
/**
* Holds if variable `v` is live after the (certain or uncertain) write at
* index `i` inside basic block `bb`.
*/
predicate liveAfterWrite(BasicBlock bb, int i, SourceVariable v) {
exists(int rnk | rnk = refRank(bb, i, v, Write(_)) | liveAtRank(bb, i, v, rnk))
}
}
private import Liveness
/**
* Holds if `df` is in the dominance frontier of `bb`.
*
* This is equivalent to:
*
* ```ql
* bb = getImmediateBasicBlockDominator*(getABasicBlockPredecessor(df)) and
* not bb = getImmediateBasicBlockDominator+(df)
* ```
*/
private predicate inDominanceFrontier(BasicBlock bb, BasicBlock df) {
bb = getABasicBlockPredecessor(df) and not bb = getImmediateBasicBlockDominator(df)
or
exists(BasicBlock prev | inDominanceFrontier(prev, df) |
bb = getImmediateBasicBlockDominator(prev) and
not bb = getImmediateBasicBlockDominator(df)
)
}
/**
* Holds if `bb` is in the dominance frontier of a block containing a
* definition of `v`.
*/
pragma[noinline]
private predicate inDefDominanceFrontier(BasicBlock bb, SourceVariable v) {
exists(BasicBlock defbb, Definition def |
def.definesAt(v, defbb, _) and
inDominanceFrontier(defbb, bb)
)
}
cached
newtype TDefinition =
TWriteDef(SourceVariable v, BasicBlock bb, int i) {
variableWrite(bb, i, v, _) and
liveAfterWrite(bb, i, v)
} or
TPhiNode(SourceVariable v, BasicBlock bb) {
inDefDominanceFrontier(bb, v) and
liveAtEntry(bb, v)
}
private module SsaDefReaches {
newtype TSsaRefKind =
SsaRead() or
SsaDef()
/**
* A classification of SSA variable references into reads and definitions.
*/
class SsaRefKind extends TSsaRefKind {
string toString() {
this = SsaRead() and
result = "SsaRead"
or
this = SsaDef() and
result = "SsaDef"
}
int getOrder() {
this = SsaRead() and
result = 0
or
this = SsaDef() and
result = 1
}
}
/**
* Holds if the `i`th node of basic block `bb` is a reference to `v`,
* either a read (when `k` is `SsaRead()`) or an SSA definition (when `k`
* is `SsaDef()`).
*
* Unlike `Liveness::ref`, this includes `phi` nodes.
*/
predicate ssaRef(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
variableRead(bb, i, v, _) and
k = SsaRead()
or
exists(Definition def | def.definesAt(v, bb, i)) and
k = SsaDef()
}
private newtype OrderedSsaRefIndex =
MkOrderedSsaRefIndex(int i, SsaRefKind k) { ssaRef(_, i, _, k) }
private OrderedSsaRefIndex ssaRefOrd(BasicBlock bb, int i, SourceVariable v, SsaRefKind k, int ord) {
ssaRef(bb, i, v, k) and
result = MkOrderedSsaRefIndex(i, k) and
ord = k.getOrder()
}
/**
* Gets the (1-based) rank of the reference to `v` at the `i`th node of basic
* block `bb`, which has the given reference kind `k`.
*
* For example, if `bb` is a basic block with a phi node for `v` (considered
* to be at index -1), reads `v` at node 2, and defines it at node 5, we have:
*
* ```ql
* ssaRefRank(bb, -1, v, SsaDef()) = 1 // phi node
* ssaRefRank(bb, 2, v, Read()) = 2 // read at node 2
* ssaRefRank(bb, 5, v, SsaDef()) = 3 // definition at node 5
* ```
*
* Reads are considered before writes when they happen at the same index.
*/
int ssaRefRank(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
ssaRefOrd(bb, i, v, k, _) =
rank[result](int j, int ord, OrderedSsaRefIndex res |
res = ssaRefOrd(bb, j, v, _, ord)
|
res order by j, ord
)
}
int maxSsaRefRank(BasicBlock bb, SourceVariable v) {
result = ssaRefRank(bb, _, v, _) and
not result + 1 = ssaRefRank(bb, _, v, _)
}
/**
* Holds if the SSA definition `def` reaches rank index `rnk` in its own
* basic block `bb`.
*/
predicate ssaDefReachesRank(BasicBlock bb, Definition def, int rnk, SourceVariable v) {
exists(int i |
rnk = ssaRefRank(bb, i, v, SsaDef()) and
def.definesAt(v, bb, i)
)
or
ssaDefReachesRank(bb, def, rnk - 1, v) and
rnk = ssaRefRank(bb, _, v, SsaRead())
}
/**
* Holds if the SSA definition of `v` at `def` reaches index `i` in the same
* basic block `bb`, without crossing another SSA definition of `v`.
*/
predicate ssaDefReachesReadWithinBlock(SourceVariable v, Definition def, BasicBlock bb, int i) {
exists(int rnk |
ssaDefReachesRank(bb, def, rnk, v) and
rnk = ssaRefRank(bb, i, v, SsaRead())
)
}
/**
* Holds if the SSA definition of `v` at `def` reaches uncertain SSA definition
* `redef` in the same basic block, without crossing another SSA definition of `v`.
*/
predicate ssaDefReachesUncertainDefWithinBlock(
SourceVariable v, Definition def, UncertainWriteDefinition redef
) {
exists(BasicBlock bb, int rnk, int i |
ssaDefReachesRank(bb, def, rnk, v) and
rnk = ssaRefRank(bb, i, v, SsaDef()) - 1 and
redef.definesAt(v, bb, i)
)
}
/**
* Same as `ssaRefRank()`, but restricted to a particular SSA definition `def`.
*/
int ssaDefRank(Definition def, SourceVariable v, BasicBlock bb, int i, SsaRefKind k) {
v = def.getSourceVariable() and
result = ssaRefRank(bb, i, v, k) and
(
ssaDefReachesRead(_, def, bb, i)
or
def.definesAt(_, bb, i)
)
}
/**
* Holds if the reference to `def` at index `i` in basic block `bb` is the
* last reference to `v` inside `bb`.
*/
pragma[noinline]
predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) {
ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v)
}
predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) {
exists(ssaDefRank(def, v, bb, _, _))
}
pragma[noinline]
private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) {
ssaDefReachesEndOfBlock(bb, def, _) and
not defOccursInBlock(_, bb, def.getSourceVariable())
}
/**
* Holds if `def` is accessed in basic block `bb1` (either a read or a write),
* `bb2` is a transitive successor of `bb1`, `def` is live at the end of `bb1`,
* and the underlying variable for `def` is neither read nor written in any block
* on the path between `bb1` and `bb2`.
*/
predicate varBlockReaches(Definition def, BasicBlock bb1, BasicBlock bb2) {
defOccursInBlock(def, bb1, _) and
bb2 = getABasicBlockSuccessor(bb1)
or
exists(BasicBlock mid |
varBlockReaches(def, bb1, mid) and
ssaDefReachesThroughBlock(def, mid) and
bb2 = getABasicBlockSuccessor(mid)
)
}
/**
* Holds if `def` is accessed in basic block `bb1` (either a read or a write),
* `def` is read at index `i2` in basic block `bb2`, `bb2` is in a transitive
* successor block of `bb1`, and `def` is neither read nor written in any block
* on a path between `bb1` and `bb2`.
*/
predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) {
varBlockReaches(def, bb1, bb2) and
ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1
}
}
private import SsaDefReaches
pragma[nomagic]
predicate liveThrough(BasicBlock bb, SourceVariable v) {
liveAtExit(bb, v) and
not ssaRef(bb, _, v, SsaDef())
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the SSA definition of `v` at `def` reaches the end of basic
* block `bb`, at which point it is still live, without crossing another
* SSA definition of `v`.
*/
pragma[nomagic]
predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable v) {
exists(int last | last = maxSsaRefRank(bb, v) |
ssaDefReachesRank(bb, def, last, v) and
liveAtExit(bb, v)
)
or
// The construction of SSA form ensures that each read of a variable is
// dominated by its definition. An SSA definition therefore reaches a
// control flow node if it is the _closest_ SSA definition that dominates
// the node. If two definitions dominate a node then one must dominate the
// other, so therefore the definition of _closest_ is given by the dominator
// tree. Thus, reaching definitions can be calculated in terms of dominance.
ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and
liveThrough(bb, pragma[only_bind_into](v))
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `inp` is an input to the phi node `phi` along the edge originating in `bb`.
*/
pragma[nomagic]
predicate phiHasInputFromBlock(PhiNode phi, Definition inp, BasicBlock bb) {
exists(SourceVariable v, BasicBlock bbDef |
phi.definesAt(v, bbDef, _) and
getABasicBlockPredecessor(bbDef) = bb and
ssaDefReachesEndOfBlock(bb, inp, v)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the SSA definition of `v` at `def` reaches a read at index `i` in
* basic block `bb`, without crossing another SSA definition of `v`. The read
* is of kind `rk`.
*/
pragma[nomagic]
predicate ssaDefReachesRead(SourceVariable v, Definition def, BasicBlock bb, int i) {
ssaDefReachesReadWithinBlock(v, def, bb, i)
or
variableRead(bb, i, v, _) and
ssaDefReachesEndOfBlock(getABasicBlockPredecessor(bb), def, v) and
not ssaDefReachesReadWithinBlock(v, _, bb, i)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `def` is accessed at index `i1` in basic block `bb1` (either a read
* or a write), `def` is read at index `i2` in basic block `bb2`, and there is a
* path between them without any read of `def`.
*/
pragma[nomagic]
predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
exists(int rnk |
rnk = ssaDefRank(def, _, bb1, i1, _) and
rnk + 1 = ssaDefRank(def, _, bb1, i2, SsaRead()) and
variableRead(bb1, i2, _, _) and
bb2 = bb1
)
or
lastSsaRef(def, _, bb1, i1) and
defAdjacentRead(def, bb1, bb2, i2)
}
pragma[noinline]
private predicate adjacentDefRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v
) {
adjacentDefRead(def, bb1, i1, bb2, i2) and
v = def.getSourceVariable()
}
private predicate adjacentDefReachesRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) |
ssaRef(bb1, i1, v, SsaDef())
or
variableRead(bb1, i1, v, true)
)
or
exists(BasicBlock bb3, int i3 |
adjacentDefReachesRead(def, bb1, i1, bb3, i3) and
variableRead(bb3, i3, _, false) and
adjacentDefRead(def, bb3, i3, bb2, i2)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `adjacentDefRead`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
variableRead(bb2, i2, _, true)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the node at index `i` in `bb` is a last reference to SSA definition
* `def`. The reference is last because it can reach another write `next`,
* without passing through another read or write.
*/
pragma[nomagic]
predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) {
exists(SourceVariable v |
// Next reference to `v` inside `bb` is a write
exists(int rnk, int j |
rnk = ssaDefRank(def, v, bb, i, _) and
next.definesAt(v, bb, j) and
rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
)
or
// Can reach a write using one or more steps
lastSsaRef(def, v, bb, i) and
exists(BasicBlock bb2 |
varBlockReaches(def, bb, bb2) and
1 = ssaDefRank(next, v, bb2, _, SsaDef())
)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `inp` is an immediately preceding definition of uncertain definition
* `def`. Since `def` is uncertain, the value from the preceding definition might
* still be valid.
*/
pragma[nomagic]
predicate uncertainWriteDefinitionInput(UncertainWriteDefinition def, Definition inp) {
lastRefRedef(inp, _, _, def)
}
private predicate adjacentDefReachesUncertainRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
variableRead(bb2, i2, _, false)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `lastRefRedef`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate lastRefRedefNoUncertainReads(Definition def, BasicBlock bb, int i, Definition next) {
lastRefRedef(def, bb, i, next) and
not variableRead(bb, i, def.getSourceVariable(), false)
or
exists(BasicBlock bb0, int i0 |
lastRefRedef(def, bb0, i0, next) and
adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the node at index `i` in `bb` is a last reference to SSA
* definition `def`.
*
* That is, the node can reach the end of the enclosing callable, or another
* SSA definition for the underlying source variable, without passing through
* another read.
*/
pragma[nomagic]
predicate lastRef(Definition def, BasicBlock bb, int i) {
lastRefRedef(def, bb, i, _)
or
lastSsaRef(def, _, bb, i) and
(
// Can reach exit directly
bb instanceof ExitBasicBlock
or
// Can reach a block using one or more steps, where `def` is no longer live
exists(BasicBlock bb2 | varBlockReaches(def, bb, bb2) |
not defOccursInBlock(def, bb2, _) and
not ssaDefReachesEndOfBlock(bb2, def, _)
)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `lastRefRedef`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate lastRefNoUncertainReads(Definition def, BasicBlock bb, int i) {
lastRef(def, bb, i) and
not variableRead(bb, i, def.getSourceVariable(), false)
or
exists(BasicBlock bb0, int i0 |
lastRef(def, bb0, i0) and
adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
)
}
/** A static single assignment (SSA) definition. */
class Definition extends TDefinition {
/** Gets the source variable underlying this SSA definition. */
SourceVariable getSourceVariable() { this.definesAt(result, _, _) }
/**
* Holds if this SSA definition defines `v` at index `i` in basic block `bb`.
* Phi nodes are considered to be at index `-1`, while normal variable writes
* are at the index of the control flow node they wrap.
*/
final predicate definesAt(SourceVariable v, BasicBlock bb, int i) {
this = TWriteDef(v, bb, i)
or
this = TPhiNode(v, bb) and i = -1
}
/** Gets the basic block to which this SSA definition belongs. */
final BasicBlock getBasicBlock() { this.definesAt(_, result, _) }
/** Gets a textual representation of this SSA definition. */
string toString() { none() }
}
/** An SSA definition that corresponds to a write. */
class WriteDefinition extends Definition, TWriteDef {
private SourceVariable v;
private BasicBlock bb;
private int i;
WriteDefinition() { this = TWriteDef(v, bb, i) }
override string toString() { result = "WriteDef" }
}
/** A phi node. */
class PhiNode extends Definition, TPhiNode {
override string toString() { result = "Phi" }
}
/**
* An SSA definition that represents an uncertain update of the underlying
* source variable.
*/
class UncertainWriteDefinition extends WriteDefinition {
UncertainWriteDefinition() {
exists(SourceVariable v, BasicBlock bb, int i |
this.definesAt(v, bb, i) and
variableWrite(bb, i, v, false)
)
}
}
/** Provides a set of consistency queries. */
module Consistency {
abstract class RelevantDefinition extends Definition {
abstract predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
);
}
query predicate nonUniqueDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
ssaDefReachesRead(v, def, bb, i) and
not exists(unique(Definition def0 | ssaDefReachesRead(v, def0, bb, i)))
}
query predicate readWithoutDef(SourceVariable v, BasicBlock bb, int i) {
variableRead(bb, i, v, _) and
not ssaDefReachesRead(v, _, bb, i)
}
query predicate deadDef(RelevantDefinition def, SourceVariable v) {
v = def.getSourceVariable() and
not ssaDefReachesRead(_, def, _, _) and
not phiHasInputFromBlock(_, def, _) and
not uncertainWriteDefinitionInput(_, def)
}
}

View File

@@ -1,18 +0,0 @@
private import semmle.code.cpp.ir.IR
private import SsaInternals as Ssa
class BasicBlock = IRBlock;
class SourceVariable = Ssa::SourceVariable;
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result.immediatelyDominates(bb) }
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
class ExitBasicBlock extends IRBlock {
ExitBasicBlock() { this.getLastInstruction() instanceof ExitFunctionInstruction }
}
predicate variableWrite = Ssa::variableWrite/4;
predicate variableRead = Ssa::variableRead/4;

View File

@@ -1,639 +0,0 @@
import SsaImplCommon
private import cpp as Cpp
private import semmle.code.cpp.ir.IR
private import DataFlowUtil
private import DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.cpp.models.interfaces.Allocation as Alloc
private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow
private module SourceVariables {
private newtype TSourceVariable =
TSourceIRVariable(IRVariable var) or
TSourceIRVariableIndirection(InitializeIndirectionInstruction init)
abstract class SourceVariable extends TSourceVariable {
IRVariable var;
abstract string toString();
}
class SourceIRVariable extends SourceVariable, TSourceIRVariable {
SourceIRVariable() { this = TSourceIRVariable(var) }
IRVariable getIRVariable() { result = var }
override string toString() { result = this.getIRVariable().toString() }
}
class SourceIRVariableIndirection extends SourceVariable, TSourceIRVariableIndirection {
InitializeIndirectionInstruction init;
SourceIRVariableIndirection() {
this = TSourceIRVariableIndirection(init) and var = init.getIRVariable()
}
IRVariable getUnderlyingIRVariable() { result = var }
override string toString() { result = "*" + this.getUnderlyingIRVariable().toString() }
}
}
import SourceVariables
cached
private newtype TDefOrUse =
TExplicitDef(Instruction store) { explicitWrite(_, store, _) } or
TInitializeParam(Instruction instr) {
instr instanceof InitializeParameterInstruction
or
instr instanceof InitializeIndirectionInstruction
} or
TExplicitUse(Operand op) { isExplicitUse(op) } or
TReturnParamIndirection(Operand op) { returnParameterIndirection(op, _) }
pragma[nomagic]
private int getRank(DefOrUse defOrUse, IRBlock block) {
defOrUse =
rank[result](int i, DefOrUse cand |
block.getInstruction(i) = toInstruction(cand)
|
cand order by i
)
}
private class DefOrUse extends TDefOrUse {
/** Gets the instruction associated with this definition, if any. */
Instruction asDef() { none() }
/** Gets the operand associated with this use, if any. */
Operand asUse() { none() }
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets the block of this definition or use. */
abstract IRBlock getBlock();
/** Holds if this definition or use has rank `rank` in block `block`. */
cached
final predicate hasRankInBlock(IRBlock block, int rnk) { rnk = getRank(this, block) }
/** Gets the location of this element. */
abstract Cpp::Location getLocation();
}
private Instruction toInstruction(DefOrUse defOrUse) {
result = defOrUse.asDef()
or
result = defOrUse.asUse().getUse()
}
abstract class Def extends DefOrUse {
Instruction store;
/** Gets the instruction of this definition. */
Instruction getInstruction() { result = store }
/** Gets the variable that is defined by this definition. */
abstract SourceVariable getSourceVariable();
/** Holds if this definition is guaranteed to happen. */
abstract predicate isCertain();
override Instruction asDef() { result = this.getInstruction() }
override string toString() { result = "Def" }
override IRBlock getBlock() { result = this.getInstruction().getBlock() }
override Cpp::Location getLocation() { result = store.getLocation() }
}
private class ExplicitDef extends Def, TExplicitDef {
ExplicitDef() { this = TExplicitDef(store) }
override SourceVariable getSourceVariable() {
exists(VariableInstruction var |
explicitWrite(_, this.getInstruction(), var) and
result.(SourceIRVariable).getIRVariable() = var.getIRVariable()
)
}
override predicate isCertain() { explicitWrite(true, this.getInstruction(), _) }
}
private class ParameterDef extends Def, TInitializeParam {
ParameterDef() { this = TInitializeParam(store) }
override SourceVariable getSourceVariable() {
result.(SourceIRVariable).getIRVariable() =
store.(InitializeParameterInstruction).getIRVariable()
or
result.(SourceIRVariableIndirection).getUnderlyingIRVariable() =
store.(InitializeIndirectionInstruction).getIRVariable()
}
override predicate isCertain() { any() }
}
abstract class Use extends DefOrUse {
Operand use;
override Operand asUse() { result = use }
/** Gets the underlying operand of this use. */
Operand getOperand() { result = use }
override string toString() { result = "Use" }
/** Gets the variable that is used by this use. */
abstract SourceVariable getSourceVariable();
override IRBlock getBlock() { result = use.getUse().getBlock() }
override Cpp::Location getLocation() { result = use.getLocation() }
}
private class ExplicitUse extends Use, TExplicitUse {
ExplicitUse() { this = TExplicitUse(use) }
override SourceVariable getSourceVariable() {
exists(VariableInstruction var |
use.getDef() = var and
if use.getUse() instanceof ReadSideEffectInstruction
then result.(SourceIRVariableIndirection).getUnderlyingIRVariable() = var.getIRVariable()
else result.(SourceIRVariable).getIRVariable() = var.getIRVariable()
)
}
}
private class ReturnParameterIndirection extends Use, TReturnParamIndirection {
ReturnParameterIndirection() { this = TReturnParamIndirection(use) }
override SourceVariable getSourceVariable() {
exists(ReturnIndirectionInstruction ret |
returnParameterIndirection(use, ret) and
result.(SourceIRVariableIndirection).getUnderlyingIRVariable() = ret.getIRVariable()
)
}
}
private predicate isExplicitUse(Operand op) {
op.getDef() instanceof VariableAddressInstruction and
not exists(LoadInstruction load |
load.getSourceAddressOperand() = op and
load.getAUse().getUse() instanceof InitializeIndirectionInstruction
)
}
private predicate returnParameterIndirection(Operand op, ReturnIndirectionInstruction ret) {
ret.getSourceAddressOperand() = op
}
/**
* Holds if `iFrom` computes an address that is used by `iTo`.
*/
predicate addressFlow(Instruction iFrom, Instruction iTo) {
iTo.(CopyValueInstruction).getSourceValue() = iFrom
or
iTo.(ConvertInstruction).getUnary() = iFrom
or
iTo.(CheckedConvertOrNullInstruction).getUnary() = iFrom
or
iTo.(InheritanceConversionInstruction).getUnary() = iFrom
or
iTo.(PointerArithmeticInstruction).getLeft() = iFrom
or
iTo.(FieldAddressInstruction).getObjectAddress() = iFrom
or
// We traverse `LoadInstruction`s since we want to conclude that the
// destination of the store operation `*x = source()` is derived from `x`.
iTo.(LoadInstruction).getSourceAddress() = iFrom
or
// We want to include `ReadSideEffectInstruction`s for the same reason that we include
// `LoadInstruction`s, but only when a `WriteSideEffectInstruction` for the same index exists as well
// (as otherwise we know that the callee won't override the data). However, given an index `i`, the
// destination of the `WriteSideEffectInstruction` for `i` is identical to the source address of the
// `ReadSideEffectInstruction` for `i`. So we don't have to talk about the `ReadSideEffectInstruction`
// at all.
exists(WriteSideEffectInstruction write |
write.getPrimaryInstruction() = iTo and
write.getDestinationAddress() = iFrom
)
}
/**
* The reflexive, transitive closure of `addressFlow` that ends as the address of a
* store or read operation.
*/
cached
predicate addressFlowTC(Instruction iFrom, Instruction iTo) {
iTo = [getDestinationAddress(_), getSourceAddress(_)] and
addressFlow*(iFrom, iTo)
}
/**
* Gets the destination address of `instr` if it is a `StoreInstruction` or
* a `WriteSideEffectInstruction`.
*/
Instruction getDestinationAddress(Instruction instr) {
result =
[
instr.(StoreInstruction).getDestinationAddress(),
instr.(WriteSideEffectInstruction).getDestinationAddress()
]
}
/** Gets the source address of `instr` if it is an instruction that behaves like a `LoadInstruction`. */
Instruction getSourceAddress(Instruction instr) { result = getSourceAddressOperand(instr).getDef() }
/**
* Gets the operand that represents the source address of `instr` if it is an
* instruction that behaves like a `LoadInstruction`.
*/
Operand getSourceAddressOperand(Instruction instr) {
result =
[
instr.(LoadInstruction).getSourceAddressOperand(),
instr.(ReadSideEffectInstruction).getArgumentOperand()
]
}
/**
* Gets the source address of `node` if it's an instruction or operand that
* behaves like a `LoadInstruction`.
*/
Instruction getSourceAddressFromNode(Node node) {
result = getSourceAddress(node.asInstruction())
or
result = getSourceAddress(node.asOperand().(SideEffectOperand).getUse())
}
/** Gets the source value of `instr` if it's an instruction that behaves like a `LoadInstruction`. */
Instruction getSourceValue(Instruction instr) { result = getSourceValueOperand(instr).getDef() }
/**
* Gets the operand that represents the source value of `instr` if it's an instruction
* that behaves like a `LoadInstruction`.
*/
Operand getSourceValueOperand(Instruction instr) {
result = instr.(LoadInstruction).getSourceValueOperand()
or
result = instr.(ReadSideEffectInstruction).getSideEffectOperand()
}
/**
* Holds if `instr` is a `StoreInstruction` or a `WriteSideEffectInstruction` that writes to an address.
* The addresses is computed using `address`, and `certain` is `true` if the write is guaranteed to overwrite
* the entire variable.
*/
cached
predicate explicitWrite(boolean certain, Instruction instr, Instruction address) {
exists(StoreInstruction store |
store = instr and addressFlowTC(address, store.getDestinationAddress())
|
// Set `certain = false` if the address is derived from any instructions that prevents us from
// concluding that the entire variable is overridden.
if
addressFlowTC(any(Instruction i |
i instanceof FieldAddressInstruction or
i instanceof PointerArithmeticInstruction or
i instanceof LoadInstruction or
i instanceof InheritanceConversionInstruction
), store.getDestinationAddress())
then certain = false
else certain = true
)
or
addressFlowTC(address, instr.(WriteSideEffectInstruction).getDestinationAddress()) and
certain = false
}
cached
private module Cached {
private predicate defUseFlow(Node nodeFrom, Node nodeTo) {
exists(IRBlock bb1, int i1, IRBlock bb2, int i2, DefOrUse defOrUse, Use use |
defOrUse.hasRankInBlock(bb1, i1) and
use.hasRankInBlock(bb2, i2) and
adjacentDefRead(_, bb1, i1, bb2, i2) and
nodeFrom.asInstruction() = toInstruction(defOrUse) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
)
}
private predicate fromStoreNode(StoreNodeInstr nodeFrom, Node nodeTo) {
// Def-use flow from a `StoreNode`.
exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Def def, Use use |
nodeFrom.isTerminal() and
def.getInstruction() = nodeFrom.getStoreInstruction() and
def.hasRankInBlock(bb1, i1) and
adjacentDefRead(_, bb1, i1, bb2, i2) and
use.hasRankInBlock(bb2, i2) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
)
or
// This final case is a bit annoying. The write side effect on an expression like `a = new A;` writes
// to a fresh address returned by `operator new`, and there's no easy way to use the shared SSA
// library to hook that up to the assignment to `a`. So instead we flow to the _first_ use of the
// value computed by `operator new` that occurs after `nodeFrom` (to avoid a loop in the
// dataflow graph).
exists(WriteSideEffectInstruction write, IRBlock bb, int i1, int i2, Operand op |
nodeFrom.getInstruction().(CallInstruction).getStaticCallTarget() instanceof
Alloc::OperatorNewAllocationFunction and
write = nodeFrom.getStoreInstruction() and
bb.getInstruction(i1) = write and
bb.getInstruction(i2) = op.getUse() and
// Flow to an instruction that occurs later in the block.
conversionFlow*(nodeFrom.getInstruction(), op.getDef()) and
nodeTo.asOperand() = op and
i2 > i1 and
// There is no previous instruction that also occurs after `nodeFrom`.
not exists(Instruction instr, int i |
bb.getInstruction(i) = instr and
conversionFlow(instr, op.getDef()) and
i1 < i and
i < i2
)
)
}
private predicate fromReadNode(ReadNode nodeFrom, Node nodeTo) {
exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Use use1, Use use2 |
use1.hasRankInBlock(bb1, i1) and
use2.hasRankInBlock(bb2, i2) and
use1.getOperand().getDef() = nodeFrom.getInstruction() and
adjacentDefRead(_, bb1, i1, bb2, i2) and
flowOutOfAddressStep(use2.getOperand(), nodeTo)
)
}
private predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) {
exists(PhiNode phi, Use use, IRBlock block, int rnk |
phi = nodeFrom.getPhiNode() and
adjacentDefRead(phi, _, _, block, rnk) and
use.hasRankInBlock(block, rnk) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
)
}
private predicate toPhiNode(Node nodeFrom, SsaPhiNode nodeTo) {
// Flow to phi nodes
exists(Def def, IRBlock block, int rnk |
def.hasRankInBlock(block, rnk) and
nodeTo.hasInputAtRankInBlock(block, rnk)
|
exists(StoreNodeInstr storeNode |
storeNode = nodeFrom and
storeNode.isTerminal() and
def.getInstruction() = storeNode.getStoreInstruction()
)
or
def.getInstruction() = nodeFrom.asInstruction()
)
or
// Phi -> phi flow
nodeTo.hasInputAtRankInBlock(_, _, nodeFrom.(SsaPhiNode).getPhiNode())
}
/**
* Holds if `nodeFrom` is a read or write, and `nTo` is the next subsequent read of the variable
* written (or read) by `storeOrRead`.
*/
cached
predicate ssaFlow(Node nodeFrom, Node nodeTo) {
// Def-use/use-use flow from an `InstructionNode`.
defUseFlow(nodeFrom, nodeTo)
or
// Def-use flow from a `StoreNode`.
fromStoreNode(nodeFrom, nodeTo)
or
// Use-use flow from a `ReadNode`.
fromReadNode(nodeFrom, nodeTo)
or
fromPhiNode(nodeFrom, nodeTo)
or
toPhiNode(nodeFrom, nodeTo)
or
// When we want to transfer flow out of a `StoreNode` we perform two steps:
// 1. Find the next use of the address being stored to
// 2. Find the `LoadInstruction` that loads the address
// When the address being stored into doesn't have a `LoadInstruction` associated with it because it's
// passed into a `CallInstruction` we transfer flow to the `ReadSideEffect`, which will then flow into
// the callee. We then pickup the flow from the `InitializeIndirectionInstruction` and use the shared
// SSA library to determine where the next use of the address that received the flow is.
exists(Node init, Node mid |
nodeFrom.asInstruction().(InitializeIndirectionInstruction).getIRVariable() =
init.asInstruction().(InitializeParameterInstruction).getIRVariable() and
// No need for the flow if the next use is the instruction that returns the flow out of the callee.
not mid.asInstruction() instanceof ReturnIndirectionInstruction and
// Find the next use of the address
ssaFlow(init, mid) and
// And flow to the next load of that address
flowOutOfAddressStep([mid.asInstruction().getAUse(), mid.asOperand()], nodeTo)
)
}
/**
* Holds if `iTo` is a conversion-like instruction that copies
* the value computed by `iFrom`.
*
* This predicate is used by `fromStoreNode` to find the next use of a pointer that
* points to freshly allocated memory.
*/
private predicate conversionFlow(Instruction iFrom, Instruction iTo) {
iTo.(CopyValueInstruction).getSourceValue() = iFrom
or
iTo.(ConvertInstruction).getUnary() = iFrom
or
iTo.(CheckedConvertOrNullInstruction).getUnary() = iFrom
or
iTo.(InheritanceConversionInstruction).getUnary() = iFrom
}
pragma[noinline]
private predicate callTargetHasInputOutput(
CallInstruction call, DataFlow::FunctionInput input, DataFlow::FunctionOutput output
) {
exists(DataFlow::DataFlowFunction func |
call.getStaticCallTarget() = func and
func.hasDataFlow(input, output)
)
}
/**
* The role of `flowOutOfAddressStep` is to select the node for which we want dataflow to end up in
* after the shared SSA library's `adjacentDefRead` predicate has determined that `operand` is the
* next use of some variable.
*
* More precisely, this predicate holds if `operand` is an operand that represents an address, and:
* - `nodeTo` is the next load of that address, or
* - `nodeTo` is a `ReadNode` that uses the definition of `operand` to start a sequence of reads, or
* - `nodeTo` is the outer-most `StoreNode` that uses the address represented by `operand`. We obtain
* use-use flow in this case since `StoreNodeFlow::flowOutOf` will then provide flow to the next of
* of `operand`.
*
* There is one final (slightly annoying) case: When `operand` is a an argument to a modeled function
* without any `ReadSideEffect` (such as `std::move`). Here, the address flows from the argument to
* the return value, which might then be read later.
*/
private predicate flowOutOfAddressStep(Operand operand, Node nodeTo) {
// Flow into a read node
exists(ReadNode readNode | readNode = nodeTo |
readNode.isInitial() and
operand.getDef() = readNode.getInstruction()
)
or
exists(StoreNodeInstr storeNode, Instruction def |
storeNode = nodeTo and
def = operand.getDef()
|
storeNode.isTerminal() and
not addressFlow(def, _) and
// Only transfer flow to a store node if it doesn't immediately overwrite the address
// we've just written to.
explicitWrite(false, storeNode.getStoreInstruction(), def)
)
or
// The destination of a store operation has undergone lvalue-to-rvalue conversion and is now a
// right-hand-side of a store operation.
// Find the next use of the variable in that store operation, and recursively find the load of that
// pointer. For example, consider this case:
//
// ```cpp
// int x = source();
// int* p = &x;
// sink(*p);
// ```
//
// if we want to find the load of the address of `x`, we see that the pointer is stored into `p`,
// and we then need to recursively look for the load of `p`.
exists(
Def def, StoreInstruction store, IRBlock block1, int rnk1, Use use, IRBlock block2, int rnk2
|
store = def.getInstruction() and
store.getSourceValueOperand() = operand and
def.hasRankInBlock(block1, rnk1) and
use.hasRankInBlock(block2, rnk2) and
adjacentDefRead(_, block1, rnk1, block2, rnk2)
|
// The shared SSA library has determined that `use` is the next use of the operand
// so we find the next load of that use (but only if there is no `PostUpdateNode`) we
// need to flow into first.
not StoreNodeFlow::flowInto(store, _) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
or
// It may also be the case that `store` gives rise to another store step. So let's make sure that
// we also take those into account.
StoreNodeFlow::flowInto(store, nodeTo)
)
or
// As we find the next load of an address, we might come across another use of the same variable.
// In that case, we recursively find the next use of _that_ operand, and continue searching for
// the next load of that operand. For example, consider this case:
//
// ```cpp
// int x = source();
// use(&x);
// int* p = &x;
// sink(*p);
// ```
//
// The next use of `x` after its definition is `use(&x)`, but there is a later load of the address
// of `x` that we want to flow to. So we use the shared SSA library to find the next load.
not operand = getSourceAddressOperand(_) and
exists(Use use1, Use use2, IRBlock block1, int rnk1, IRBlock block2, int rnk2 |
use1.getOperand() = operand and
use1.hasRankInBlock(block1, rnk1) and
// Don't flow to the next use if this use is part of a store operation that totally
// overrides a variable.
not explicitWrite(true, _, use1.getOperand().getDef()) and
adjacentDefRead(_, block1, rnk1, block2, rnk2) and
use2.hasRankInBlock(block2, rnk2) and
flowOutOfAddressStep(use2.getOperand(), nodeTo)
)
or
operand = getSourceAddressOperand(nodeTo.asInstruction())
or
exists(ReturnIndirectionInstruction ret |
ret.getSourceAddressOperand() = operand and
ret = nodeTo.asInstruction()
)
or
exists(ReturnValueInstruction ret |
ret.getReturnAddressOperand() = operand and
nodeTo.asInstruction() = ret
)
or
exists(CallInstruction call, int index, ReadSideEffectInstruction read |
call.getArgumentOperand(index) = operand and
read = getSideEffectFor(call, index) and
nodeTo.asOperand() = read.getSideEffectOperand()
)
or
exists(CopyInstruction copy |
not exists(getSourceAddressOperand(copy)) and
copy.getSourceValueOperand() = operand and
flowOutOfAddressStep(copy.getAUse(), nodeTo)
)
or
exists(ConvertInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nodeTo)
)
or
exists(CheckedConvertOrNullInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nodeTo)
)
or
exists(InheritanceConversionInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nodeTo)
)
or
exists(PointerArithmeticInstruction arith |
arith.getLeftOperand() = operand and
flowOutOfAddressStep(arith.getAUse(), nodeTo)
)
or
// Flow through a modeled function that has parameter -> return value flow.
exists(
CallInstruction call, int index, DataFlow::FunctionInput input,
DataFlow::FunctionOutput output
|
callTargetHasInputOutput(call, input, output) and
call.getArgumentOperand(index) = operand and
not getSideEffectFor(call, index) instanceof ReadSideEffectInstruction and
input.isParameter(index) and
output.isReturnValue() and
flowOutOfAddressStep(call.getAUse(), nodeTo)
)
}
}
import Cached
/**
* Holds if the `i`'th write in block `bb` writes to the variable `v`.
* `certain` is `true` if the write is guaranteed to overwrite the entire variable.
*/
predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) {
DataFlowImplCommon::forceCachingInSameStage() and
exists(Def def |
def.hasRankInBlock(bb, i) and
v = def.getSourceVariable() and
(if def.isCertain() then certain = true else certain = false)
)
}
/**
* Holds if the `i`'th read in block `bb` reads to the variable `v`.
* `certain` is `true` if the read is guaranteed. For C++, this is always the case.
*/
predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) {
exists(Use use |
use.hasRankInBlock(bb, i) and
v = use.getSourceVariable() and
certain = true
)
}

View File

@@ -44,6 +44,8 @@ private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand t
fromInstr = readInstr.getArgumentDef() and
toOperand = readInstr.getSideEffectOperand()
)
or
toOperand.(LoadOperand).getAnyDef() = fromInstr
}
/**
@@ -82,6 +84,8 @@ private predicate operandToInstructionTaintStep(Operand opFrom, Instruction inst
instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
)
or
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
or
// Flow from an element to an array or union that contains it.
instrTo.(ChiInstruction).getPartialOperand() = opFrom and
not instrTo.isResultConflated() and

View File

@@ -762,21 +762,11 @@ class ReturnValueInstruction extends ReturnInstruction {
*/
final LoadOperand getReturnValueOperand() { result = this.getAnOperand() }
/**
* Gets the operand that provides the address of the value being returned by the function.
*/
final AddressOperand getReturnAddressOperand() { result = this.getAnOperand() }
/**
* Gets the instruction whose result provides the value being returned by the function, if an
* exact definition is available.
*/
final Instruction getReturnValue() { result = this.getReturnValueOperand().getDef() }
/**
* Gets the instruction whose result provides the address of the value being returned by the function.
*/
final Instruction getReturnAddress() { result = this.getReturnAddressOperand().getDef() }
}
/**

View File

@@ -20,14 +20,6 @@ private import internal.OperandInternal
private class TStageOperand =
TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
/**
* A known location. Testing `loc instanceof KnownLocation` will account for non existing locations, as
* opposed to testing `not loc isntanceof UnknownLocation`
*/
private class KnownLocation extends Language::Location {
KnownLocation() { not this instanceof Language::UnknownLocation }
}
/**
* An operand of an `Instruction`. The operand represents a use of the result of one instruction
* (the defining instruction) in another instruction (the use instruction)
@@ -53,10 +45,8 @@ class Operand extends TStageOperand {
/**
* Gets the location of the source code for this operand.
* By default this is where the operand is used, but some subclasses may override this
* using `getAnyDef()` if it makes more sense.
*/
Language::Location getLocation() { result = this.getUse().getLocation() }
final Language::Location getLocation() { result = this.getUse().getLocation() }
/**
* Gets the function that contains this operand.
@@ -279,10 +269,6 @@ class RegisterOperand extends NonPhiOperand, TRegisterOperand {
final override string toString() { result = tag.toString() }
// most `RegisterOperands` have a more meaningful location at the definition
// the only exception are specific cases of `ThisArgumentOperand`
override Language::Location getLocation() { result = this.getAnyDef().getLocation() }
final override Instruction getAnyDef() { result = defInstr }
final override Overlap getDefinitionOverlap() {
@@ -307,7 +293,7 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOpe
final override string toString() { result = tag.toString() }
final override Instruction getAnyDef() {
result = unique(Instruction defInstr | this.hasDefinition(defInstr, _))
result = unique(Instruction defInstr | hasDefinition(defInstr, _))
}
final override Overlap getDefinitionOverlap() { this.hasDefinition(_, result) }
@@ -415,19 +401,11 @@ class ArgumentOperand extends RegisterOperand {
}
/**
* An operand representing the implicit `this` argument to a member function
* An operand representing the implicit 'this' argument to a member function
* call.
*/
class ThisArgumentOperand extends ArgumentOperand {
override ThisArgumentOperandTag tag;
// in most cases the def location makes more sense, but in some corner cases it
// has an unknown location: in those cases we fall back to the use location
override Language::Location getLocation() {
if this.getAnyDef().getLocation() instanceof KnownLocation
then result = this.getAnyDef().getLocation()
else result = this.getUse().getLocation()
}
}
/**

View File

@@ -762,21 +762,11 @@ class ReturnValueInstruction extends ReturnInstruction {
*/
final LoadOperand getReturnValueOperand() { result = this.getAnOperand() }
/**
* Gets the operand that provides the address of the value being returned by the function.
*/
final AddressOperand getReturnAddressOperand() { result = this.getAnOperand() }
/**
* Gets the instruction whose result provides the value being returned by the function, if an
* exact definition is available.
*/
final Instruction getReturnValue() { result = this.getReturnValueOperand().getDef() }
/**
* Gets the instruction whose result provides the address of the value being returned by the function.
*/
final Instruction getReturnAddress() { result = this.getReturnAddressOperand().getDef() }
}
/**

View File

@@ -20,14 +20,6 @@ private import internal.OperandInternal
private class TStageOperand =
TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
/**
* A known location. Testing `loc instanceof KnownLocation` will account for non existing locations, as
* opposed to testing `not loc isntanceof UnknownLocation`
*/
private class KnownLocation extends Language::Location {
KnownLocation() { not this instanceof Language::UnknownLocation }
}
/**
* An operand of an `Instruction`. The operand represents a use of the result of one instruction
* (the defining instruction) in another instruction (the use instruction)
@@ -53,10 +45,8 @@ class Operand extends TStageOperand {
/**
* Gets the location of the source code for this operand.
* By default this is where the operand is used, but some subclasses may override this
* using `getAnyDef()` if it makes more sense.
*/
Language::Location getLocation() { result = this.getUse().getLocation() }
final Language::Location getLocation() { result = this.getUse().getLocation() }
/**
* Gets the function that contains this operand.
@@ -279,10 +269,6 @@ class RegisterOperand extends NonPhiOperand, TRegisterOperand {
final override string toString() { result = tag.toString() }
// most `RegisterOperands` have a more meaningful location at the definition
// the only exception are specific cases of `ThisArgumentOperand`
override Language::Location getLocation() { result = this.getAnyDef().getLocation() }
final override Instruction getAnyDef() { result = defInstr }
final override Overlap getDefinitionOverlap() {
@@ -307,7 +293,7 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOpe
final override string toString() { result = tag.toString() }
final override Instruction getAnyDef() {
result = unique(Instruction defInstr | this.hasDefinition(defInstr, _))
result = unique(Instruction defInstr | hasDefinition(defInstr, _))
}
final override Overlap getDefinitionOverlap() { this.hasDefinition(_, result) }
@@ -415,19 +401,11 @@ class ArgumentOperand extends RegisterOperand {
}
/**
* An operand representing the implicit `this` argument to a member function
* An operand representing the implicit 'this' argument to a member function
* call.
*/
class ThisArgumentOperand extends ArgumentOperand {
override ThisArgumentOperandTag tag;
// in most cases the def location makes more sense, but in some corner cases it
// has an unknown location: in those cases we fall back to the use location
override Language::Location getLocation() {
if this.getAnyDef().getLocation() instanceof KnownLocation
then result = this.getAnyDef().getLocation()
else result = this.getUse().getLocation()
}
}
/**

View File

@@ -762,21 +762,11 @@ class ReturnValueInstruction extends ReturnInstruction {
*/
final LoadOperand getReturnValueOperand() { result = this.getAnOperand() }
/**
* Gets the operand that provides the address of the value being returned by the function.
*/
final AddressOperand getReturnAddressOperand() { result = this.getAnOperand() }
/**
* Gets the instruction whose result provides the value being returned by the function, if an
* exact definition is available.
*/
final Instruction getReturnValue() { result = this.getReturnValueOperand().getDef() }
/**
* Gets the instruction whose result provides the address of the value being returned by the function.
*/
final Instruction getReturnAddress() { result = this.getReturnAddressOperand().getDef() }
}
/**

View File

@@ -20,14 +20,6 @@ private import internal.OperandInternal
private class TStageOperand =
TRegisterOperand or TNonSSAMemoryOperand or TPhiOperand or TChiOperand;
/**
* A known location. Testing `loc instanceof KnownLocation` will account for non existing locations, as
* opposed to testing `not loc isntanceof UnknownLocation`
*/
private class KnownLocation extends Language::Location {
KnownLocation() { not this instanceof Language::UnknownLocation }
}
/**
* An operand of an `Instruction`. The operand represents a use of the result of one instruction
* (the defining instruction) in another instruction (the use instruction)
@@ -53,10 +45,8 @@ class Operand extends TStageOperand {
/**
* Gets the location of the source code for this operand.
* By default this is where the operand is used, but some subclasses may override this
* using `getAnyDef()` if it makes more sense.
*/
Language::Location getLocation() { result = this.getUse().getLocation() }
final Language::Location getLocation() { result = this.getUse().getLocation() }
/**
* Gets the function that contains this operand.
@@ -279,10 +269,6 @@ class RegisterOperand extends NonPhiOperand, TRegisterOperand {
final override string toString() { result = tag.toString() }
// most `RegisterOperands` have a more meaningful location at the definition
// the only exception are specific cases of `ThisArgumentOperand`
override Language::Location getLocation() { result = this.getAnyDef().getLocation() }
final override Instruction getAnyDef() { result = defInstr }
final override Overlap getDefinitionOverlap() {
@@ -307,7 +293,7 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, TNonPhiMemoryOpe
final override string toString() { result = tag.toString() }
final override Instruction getAnyDef() {
result = unique(Instruction defInstr | this.hasDefinition(defInstr, _))
result = unique(Instruction defInstr | hasDefinition(defInstr, _))
}
final override Overlap getDefinitionOverlap() { this.hasDefinition(_, result) }
@@ -415,19 +401,11 @@ class ArgumentOperand extends RegisterOperand {
}
/**
* An operand representing the implicit `this` argument to a member function
* An operand representing the implicit 'this' argument to a member function
* call.
*/
class ThisArgumentOperand extends ArgumentOperand {
override ThisArgumentOperandTag tag;
// in most cases the def location makes more sense, but in some corner cases it
// has an unknown location: in those cases we fall back to the use location
override Language::Location getLocation() {
if this.getAnyDef().getLocation() instanceof KnownLocation
then result = this.getAnyDef().getLocation()
else result = this.getUse().getLocation()
}
}
/**

View File

@@ -34,16 +34,6 @@ private class IteratorByTraits extends Iterator {
IteratorByTraits() { exists(IteratorTraits it | it.getIteratorType() = this) }
}
/**
* The C++ standard includes an `std::iterator_traits` specialization for pointer types. When
* this specialization is included in the database, a pointer type `T*` will be an instance
* of the `IteratorByTraits` class. However, if the `T*` specialization is not in the database,
* we need to explicitly include them with this class.
*/
private class IteratorByPointer extends Iterator instanceof PointerType {
IteratorByPointer() { not this instanceof IteratorByTraits }
}
/**
* A type which has the typedefs expected for an iterator.
*/

View File

@@ -18,12 +18,12 @@ class SuppressionComment extends Comment {
(
this instanceof CppStyleComment and
// strip the beginning slashes
text = this.getContents().suffix(2)
text = getContents().suffix(2)
or
this instanceof CStyleComment and
// strip both the beginning /* and the end */ the comment
exists(string text0 |
text0 = this.getContents().suffix(2) and
text0 = getContents().suffix(2) and
text = text0.prefix(text0.length() - 2)
) and
// The /* */ comment must be a single-line comment

View File

@@ -153,12 +153,12 @@ class ExtClass extends Class {
}
predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
if this.hasOneVariableGroup()
if hasOneVariableGroup()
then
exists(VariableDeclarationGroup vdg | vdg.getClass() = this |
vdg.hasLocationInfo(path, startline, startcol, endline, endcol)
)
else this.getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
else getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
}
}

View File

@@ -50,16 +50,6 @@ predicate reachableThing(Thing t) {
exists(Thing mid | reachableThing(mid) and mid.callsOrAccesses() = t)
}
pragma[nomagic]
predicate callsOrAccessesPlus(Thing thing1, FunctionToRemove thing2) {
thing1.callsOrAccesses() = thing2
or
exists(Thing mid |
thing1.callsOrAccesses() = mid and
callsOrAccessesPlus(mid, thing2)
)
}
class Thing extends Locatable {
Thing() {
this instanceof Function or
@@ -91,7 +81,7 @@ class FunctionToRemove extends Function {
}
Thing getOther() {
callsOrAccessesPlus(result, this) and
result.callsOrAccesses+() = this and
this != result and
// We will already be reporting the enclosing function of a
// local variable, so don't also report the variable

View File

@@ -1,12 +0,0 @@
## 0.0.5
### New Queries
* A new query `cpp/certificate-not-checked` has been added for C/C++. The query flags unsafe use of OpenSSL and similar libraries.
* A new query `cpp/certificate-result-conflation` has been added for C/C++. The query flags unsafe use of OpenSSL and similar libraries.
## 0.0.4
### New Queries
* A new query `cpp/non-https-url` has been added for C/C++. The query flags uses of `http` URLs that might be better replaced with `https`.

Some files were not shown because too many files have changed in this diff Show More