From 645db5bc908d1aea36e94ab06578e8efa01da5fb Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Thu, 12 Sep 2024 17:44:56 +0100 Subject: [PATCH] PS: Add SSA library. --- powershell/ql/lib/qlpack.yml | 1 + .../ql/lib/semmle/code/powershell/Ast.qll | 3 + .../semmle/code/powershell/dataflow/Ssa.qll | 148 ++++++++ .../powershell/dataflow/internal/SsaImpl.qll | 338 ++++++++++++++++++ 4 files changed, 490 insertions(+) create mode 100644 powershell/ql/lib/semmle/code/powershell/dataflow/Ssa.qll create mode 100644 powershell/ql/lib/semmle/code/powershell/dataflow/internal/SsaImpl.qll diff --git a/powershell/ql/lib/qlpack.yml b/powershell/ql/lib/qlpack.yml index 7c5c017b4bb..8accccc6390 100644 --- a/powershell/ql/lib/qlpack.yml +++ b/powershell/ql/lib/qlpack.yml @@ -10,5 +10,6 @@ upgrades: upgrades dependencies: codeql/controlflow: ${workspace} codeql/dataflow: ${workspace} + codeql/ssa: ${workspace} codeql/util: ${workspace} warnOnImplicitThis: true \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/Ast.qll b/powershell/ql/lib/semmle/code/powershell/Ast.qll index 4db429a36a8..b9c1bc29518 100644 --- a/powershell/ql/lib/semmle/code/powershell/Ast.qll +++ b/powershell/ql/lib/semmle/code/powershell/Ast.qll @@ -1,4 +1,5 @@ import powershell +private import semmle.code.powershell.controlflow.internal.Scope class Ast extends @ast { string toString() { none() } @@ -6,4 +7,6 @@ class Ast extends @ast { Ast getParent() { parent(this, result) } Location getLocation() { none() } + + final Scope getEnclosingScope() { result = scopeOf(this) } } diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/Ssa.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/Ssa.qll new file mode 100644 index 00000000000..dc8851f885a --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/Ssa.qll @@ -0,0 +1,148 @@ +/** + * Provides the module `Ssa` for working with static single assignment (SSA) form. + */ + +/** + * Provides classes for working with static single assignment (SSA) form. + */ +module Ssa { + private import semmle.code.powershell.Cfg + private import powershell + private import internal.SsaImpl as SsaImpl + private import CfgNodes::ExprNodes + + /** A static single assignment (SSA) definition. */ + class Definition extends SsaImpl::Definition { + /** + * Gets the control flow node of this SSA definition, if any. Phi nodes are + * examples of SSA definitions without a control flow node, as they are + * modeled at index `-1` in the relevant basic block. + */ + final CfgNode getControlFlowNode() { + exists(BasicBlock bb, int i | this.definesAt(_, bb, i) | result = bb.getNode(i)) + } + + /** Gets a control-flow node that reads the value of this SSA definition. */ + final VarReadAccessCfgNode getARead() { result = SsaImpl::getARead(this) } + + /** + * Gets a first control-flow node that reads the value of this SSA definition. + * That is, a read that can be reached from this definition without passing + * through other reads. + */ + final VarReadAccessCfgNode getAFirstRead() { SsaImpl::firstRead(this, result) } + + /** + * Gets a last control-flow node that reads the value of this SSA definition. + * That is, a read that can reach the end of the enclosing CFG scope, or another + * SSA definition for the source variable, without passing through any other read. + */ + final VarReadAccessCfgNode getALastRead() { SsaImpl::lastRead(this, result) } + + /** + * Holds if `read1` and `read2` are adjacent reads of this SSA definition. + * That is, `read2` can be reached from `read1` without passing through + * another read. + */ + final predicate hasAdjacentReads( + VarReadAccessCfgNode read1, VarReadAccessCfgNode read2 + ) { + SsaImpl::adjacentReadPair(this, read1, read2) + } + + /** + * Gets an SSA definition whose value can flow to this one in one step. This + * includes inputs to phi nodes and the prior definitions of uncertain writes. + */ + private Definition getAPhiInputOrPriorDefinition() { result = this.(PhiNode).getAnInput() } + + /** + * Gets a definition that ultimately defines this SSA definition and is + * not itself a phi node. + */ + final Definition getAnUltimateDefinition() { + result = this.getAPhiInputOrPriorDefinition*() and + not result instanceof PhiNode + } + + override string toString() { result = this.getControlFlowNode().toString() } + + /** Gets the scope of this SSA definition. */ + CfgScope getScope() { result = this.getBasicBlock().getScope() } + } + + /** + * An SSA definition that corresponds to a write. + */ + class WriteDefinition extends Definition, SsaImpl::WriteDefinition { + private VariableWriteAccessCfgNode write; + + WriteDefinition() { + exists(BasicBlock bb, int i, Variable v | + this.definesAt(v, bb, i) and + SsaImpl::variableWriteActual(bb, i, v, write) + ) + } + + /** Gets the underlying write access. */ + final VariableWriteAccessCfgNode getWriteAccess() { result = write } + + /** + * Holds if this SSA definition assigns `value` to the underlying variable. + */ + predicate assigns(CfgNodes::StmtCfgNode value) { + exists(CfgNodes::StmtNodes::AssignStmtCfgNode a, BasicBlock bb, int i | + this.definesAt(_, bb, i) and + a = bb.getNode(i) and + value = a.getRightHandSide() + ) + } + + final override string toString() { result = write.toString() } + + final override Location getLocation() { result = write.getLocation() } + } + + /** + * An SSA definition inserted at the beginning of a scope to represent an + * uninitialized local variable. + */ + class UninitializedDefinition extends Definition, SsaImpl::WriteDefinition { + private Variable v; + + UninitializedDefinition() { + exists(BasicBlock bb, int i | + this.definesAt(v, bb, i) and + SsaImpl::uninitializedWrite(bb, i, v) + ) + } + + final override string toString() { result = " " + v } + + final override Location getLocation() { result = this.getBasicBlock().getLocation() } + } + + /** A phi node. */ + class PhiNode extends Definition, SsaImpl::PhiNode { + /** Gets an input of this phi node. */ + final Definition getAnInput() { this.hasInputFromBlock(result, _) } + + /** Holds if `inp` is an input to this phi node along the edge originating in `bb`. */ + predicate hasInputFromBlock(Definition inp, BasicBlock bb) { + inp = SsaImpl::phiHasInputFromBlock(this, bb) + } + + override string toString() { result = "phi" } + + /** + * The location of a phi node is the same as the location of the first node + * in the basic block in which it is defined. + * + * Strictly speaking, the node is *before* the first node, but such a location + * does not exist in the source program. + */ + final override Location getLocation() { + result = this.getBasicBlock().getFirstNode().getLocation() + } + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/SsaImpl.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/SsaImpl.qll new file mode 100644 index 00000000000..1d140e09a71 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/SsaImpl.qll @@ -0,0 +1,338 @@ +private import codeql.ssa.Ssa as SsaImplCommon +private import powershell +private import semmle.code.powershell.Cfg as Cfg +private import semmle.code.powershell.controlflow.internal.ControlFlowGraphImpl as ControlFlowGraphImpl +private import semmle.code.powershell.dataflow.Ssa +private import Cfg::CfgNodes::ExprNodes + +module SsaInput implements SsaImplCommon::InputSig { + private import semmle.code.powershell.controlflow.ControlFlowGraph as Cfg + private import semmle.code.powershell.controlflow.BasicBlocks as BasicBlocks + + class BasicBlock = BasicBlocks::BasicBlock; + + class ControlFlowNode = Cfg::CfgNode; + + BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() } + + BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() } + + class ExitBasicBlock extends BasicBlock, BasicBlocks::ExitBasicBlock { } + + class SourceVariable = LocalVariable; + + /** + * Holds if the statement at index `i` of basic block `bb` contains a write to variable `v`. + * `certain` is true if the write definitely occurs. + */ + predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) { + ( + uninitializedWrite(bb, i, v) + or + variableWriteActual(bb, i, v, _) + ) and + certain = true + } + + predicate variableRead(BasicBlock bb, int i, LocalVariable v, boolean certain) { + variableReadActual(bb, i, v) and + certain = true + } +} + +import SsaImplCommon::Make as Impl + +class Definition = Impl::Definition; + +class WriteDefinition = Impl::WriteDefinition; + +class UncertainWriteDefinition = Impl::UncertainWriteDefinition; + +class PhiNode = Impl::PhiNode; + +module Consistency = Impl::Consistency; + +/** Holds if `v` is uninitialized at index `i` in entry block `bb`. */ +predicate uninitializedWrite(Cfg::EntryBasicBlock bb, int i, LocalVariable v) { + v.getDeclaringScope() = bb.getScope() and + i = -1 +} + +/** Holds if `v` is read at index `i` in basic block `bb`. */ +private predicate variableReadActual(Cfg::BasicBlock bb, int i, LocalVariable v) { + exists(VarReadAccess read | + read.getVariable() = v and + read = bb.getNode(i).getAstNode() + ) +} + +pragma[noinline] +private predicate adjacentDefRead( + Definition def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2, + SsaInput::SourceVariable v +) { + Impl::adjacentDefRead(def, bb1, i1, bb2, i2) and + v = def.getSourceVariable() +} + +pragma[noinline] +private predicate adjacentDefReadExt( + DefinitionExt def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2, + SsaInput::SourceVariable v +) { + Impl::adjacentDefReadExt(def, _, bb1, i1, bb2, i2) and + v = def.getSourceVariable() +} + +private predicate adjacentDefReachesRead( + Definition def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2 +) { + exists(SsaInput::SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) | + def.definesAt(v, bb1, i1) + or + SsaInput::variableRead(bb1, i1, v, true) + ) + or + exists(SsaInput::BasicBlock bb3, int i3 | + adjacentDefReachesRead(def, bb1, i1, bb3, i3) and + SsaInput::variableRead(bb3, i3, _, false) and + Impl::adjacentDefRead(def, bb3, i3, bb2, i2) + ) +} + +private predicate adjacentDefReachesReadExt( + DefinitionExt def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2 +) { + exists(SsaInput::SourceVariable v | adjacentDefReadExt(def, bb1, i1, bb2, i2, v) | + def.definesAt(v, bb1, i1, _) + or + SsaInput::variableRead(bb1, i1, v, true) + ) + or + exists(SsaInput::BasicBlock bb3, int i3 | + adjacentDefReachesReadExt(def, bb1, i1, bb3, i3) and + SsaInput::variableRead(bb3, i3, _, false) and + Impl::adjacentDefReadExt(def, _, bb3, i3, bb2, i2) + ) +} + +/** Same as `adjacentDefRead`, but skips uncertain reads. */ +pragma[nomagic] +private predicate adjacentDefSkipUncertainReads( + Definition def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2 +) { + adjacentDefReachesRead(def, bb1, i1, bb2, i2) and + SsaInput::variableRead(bb2, i2, _, true) +} + +private predicate adjacentDefReachesUncertainReadExt( + DefinitionExt def, SsaInput::BasicBlock bb1, int i1, SsaInput::BasicBlock bb2, int i2 +) { + adjacentDefReachesReadExt(def, bb1, i1, bb2, i2) and + SsaInput::variableRead(bb2, i2, _, false) +} + +/** Same as `lastRefRedef`, but skips uncertain reads. */ +pragma[nomagic] +private predicate lastRefSkipUncertainReadsExt(DefinitionExt def, SsaInput::BasicBlock bb, int i) { + Impl::lastRef(def, bb, i) and + not SsaInput::variableRead(bb, i, def.getSourceVariable(), false) + or + exists(SsaInput::BasicBlock bb0, int i0 | + Impl::lastRef(def, bb0, i0) and + adjacentDefReachesUncertainReadExt(def, bb, i, bb0, i0) + ) +} + +cached +private module Cached { + /** + * Holds if `v` is written at index `i` in basic block `bb`, and the corresponding + * AST write access is `write`. + */ + cached + predicate variableWriteActual( + Cfg::BasicBlock bb, int i, LocalVariable v, VariableWriteAccessCfgNode write + ) { + exists(Cfg::CfgNode n | + write.getVariable() = v and + n = bb.getNode(i) + | + write.isExplicitWrite(n) + ) + } + + cached + VarReadAccessCfgNode getARead(Definition def) { + exists(LocalVariable v, Cfg::BasicBlock bb, int i | + Impl::ssaDefReachesRead(v, def, bb, i) and + variableReadActual(bb, i, v) and + result = bb.getNode(i) + ) + } + + private import semmle.code.powershell.dataflow.Ssa + + cached + Definition phiHasInputFromBlock(PhiNode phi, Cfg::BasicBlock bb) { + Impl::phiHasInputFromBlock(phi, result, bb) + } + + /** + * Holds if the value defined at SSA definition `def` can reach a read at `read`, + * without passing through any other non-pseudo read. + */ + cached + predicate firstRead(Definition def, VarReadAccessCfgNode read) { + exists(Cfg::BasicBlock bb1, int i1, Cfg::BasicBlock bb2, int i2 | + def.definesAt(_, bb1, i1) and + adjacentDefSkipUncertainReads(def, bb1, i1, bb2, i2) and + read = bb2.getNode(i2) + ) + } + + /** + * Holds if the read at `read2` is a read of the same SSA definition `def` + * as the read at `read1`, and `read2` can be reached from `read1` without + * passing through another non-pseudo read. + */ + cached + predicate adjacentReadPair(Definition def, VarReadAccessCfgNode read1, VarReadAccessCfgNode read2) { + exists(Cfg::BasicBlock bb1, int i1, Cfg::BasicBlock bb2, int i2 | + read1 = bb1.getNode(i1) and + variableReadActual(bb1, i1, _) and + adjacentDefSkipUncertainReads(def, bb1, i1, bb2, i2) and + read2 = bb2.getNode(i2) + ) + } + + /** + * Holds if the read of `def` at `read` may be a last read. That is, `read` + * can either reach another definition of the underlying source variable or + * the end of the CFG scope, without passing through another non-pseudo read. + */ + cached + predicate lastRead(Definition def, VarReadAccessCfgNode read) { + exists(Cfg::BasicBlock bb, int i | + lastRefSkipUncertainReadsExt(def, bb, i) and + variableReadActual(bb, i, _) and + read = bb.getNode(i) + ) + } + + cached + Definition uncertainWriteDefinitionInput(UncertainWriteDefinition def) { + Impl::uncertainWriteDefinitionInput(def, result) + } + + cached + module DataFlowIntegration { + import DataFlowIntegrationImpl + + cached + predicate localFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo, boolean isUseStep) { + DataFlowIntegrationImpl::localFlowStep(def, nodeFrom, nodeTo, isUseStep) + } + + cached + predicate localMustFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo) { + DataFlowIntegrationImpl::localMustFlowStep(def, nodeFrom, nodeTo) + } + + signature predicate guardChecksSig(Cfg::CfgNodes::AstCfgNode g, Cfg::CfgNode e, boolean branch); + + cached // nothing is actually cached + module BarrierGuard { + private Node getABarrierNodeImpl() { + none() // TODO + } + + predicate getABarrierNode = getABarrierNodeImpl/0; + } + } +} + +import Cached + +/** + * An extended static single assignment (SSA) definition. + * + * This is either a normal SSA definition (`Definition`) or a + * phi-read node (`PhiReadNode`). + * + * Only intended for internal use. + */ +class DefinitionExt extends Impl::DefinitionExt { + VarReadAccessCfgNode getARead() { result = getARead(this) } + + override string toString() { result = this.(Ssa::Definition).toString() } + + override Location getLocation() { result = this.(Ssa::Definition).getLocation() } +} + +/** + * A phi-read node. + * + * Only intended for internal use. + */ +class PhiReadNode extends DefinitionExt, Impl::PhiReadNode { + override string toString() { result = "SSA phi read(" + this.getSourceVariable() + ")" } + + override Location getLocation() { result = Impl::PhiReadNode.super.getLocation() } +} + +abstract class NormalParameter extends Parameter { } + +/** Gets the SSA definition node corresponding to parameter `p`. */ +pragma[nomagic] +DefinitionExt getParameterDef(Parameter p) { + none() // TODO +} + +private newtype TParameterExt = TNormalParameter(NormalParameter p) + +/** A normal parameter or an implicit `self` parameter. */ +class ParameterExt extends TParameterExt { + NormalParameter asParameter() { this = TNormalParameter(result) } + + predicate isInitializedBy(WriteDefinition def) { def = getParameterDef(this.asParameter()) } + + string toString() { result = this.asParameter().toString() } + + Location getLocation() { result = this.asParameter().getLocation() } +} + +private module DataFlowIntegrationInput implements Impl::DataFlowIntegrationInputSig { + class Parameter = ParameterExt; + + class Expr extends Cfg::CfgNodes::ExprCfgNode { + predicate hasCfgNode(SsaInput::BasicBlock bb, int i) { this = bb.getNode(i) } + } + + Expr getARead(Definition def) { result = Cached::getARead(def) } + + predicate ssaDefAssigns(WriteDefinition def, Expr value) { + def.(Ssa::WriteDefinition).assigns(value) + } + + predicate ssaDefInitializesParam(WriteDefinition def, Parameter p) { p.isInitializedBy(def) } + + class Guard extends Cfg::CfgNodes::AstCfgNode { + predicate hasCfgNode(SsaInput::BasicBlock bb, int i) { this = bb.getNode(i) } + } + + /** Holds if the guard `guard` controls block `bb` upon evaluating to `branch`. */ + predicate guardControlsBlock(Guard guard, SsaInput::BasicBlock bb, boolean branch) { + none() // TODO + } + + /** Gets an immediate conditional successor of basic block `bb`, if any. */ + SsaInput::BasicBlock getAConditionalBasicBlockSuccessor(SsaInput::BasicBlock bb, boolean branch) { + exists(Cfg::SuccessorTypes::ConditionalSuccessor s | + result = bb.getASuccessor(s) and + s.getValue() = branch + ) + } +} + +private module DataFlowIntegrationImpl = Impl::DataFlowIntegration;