diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll index ad652063f19..8297a9fe6d3 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -92,16 +92,14 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { * ``` * In this case, the sink pair identified by the product flow library (without any additional barriers) * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic - * instruction `pai` such that: - * 1. The left-hand of `pai` flows from the allocation, and - * 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`) + * instruction `pai = a + b` such that: + * 1. the allocation flows to `a`, and + * 2. `b <= n` where `n` is the `n` in `p[n]` * but because there's a strict comparison that compares `n` against the size of the allocation this * snippet is fine. */ -module Barrier2 { - private class FlowState2 = int; - - private module BarrierConfig2 implements DataFlow::ConfigSig { +module SizeBarrier { + private module SizeBarrierConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { // The sources is the same as in the sources for the second // projection in the `AllocToInvalidPointerConfig` module. @@ -109,19 +107,19 @@ module Barrier2 { } additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state, - boolean testIsTrue + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue ) { - // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + // The sink is any "large" side of a relational comparison. i.e., the `right` expression + // in a guard such as `left < right + k`. + g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue) } predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } } - private import DataFlow::Global + private import DataFlow::Global - private FlowState2 getAFlowStateForNode(DataFlow::Node node) { + private int getAFlowStateForNode(DataFlow::Node node) { exists(DataFlow::Node source | flow(source, node) and hasSize(_, source, result) @@ -129,14 +127,14 @@ module Barrier2 { } private predicate operandGuardChecks( - IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge + IRGuardCondition g, Operand left, Operand right, int state, boolean edge ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 | + exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k | nRight.asOperand() = right and nLeft.asOperand() = left and - BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and + SizeBarrierConfig::isSink(nLeft, nRight, g, k, edge) and state = getAFlowStateForNode(nRight) and - state0 <= state + k <= state ) } @@ -144,7 +142,7 @@ module Barrier2 { * Gets an instruction that is guarded by a guard condition which ensures that * the value of the instruction is upper-bounded by size of some allocation. */ - Instruction getABarrierInstruction(FlowState2 state) { + Instruction getABarrierInstruction(int state) { exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | use = value.getAUse() and operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, @@ -158,7 +156,7 @@ module Barrier2 { * Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that * the value of the node is upper-bounded by size of some allocation. */ - DataFlow::Node getABarrierNode(FlowState2 state) { + DataFlow::Node getABarrierNode(int state) { result.asOperand() = getABarrierInstruction(state).getAUse() } @@ -166,9 +164,7 @@ module Barrier2 { * Gets the block of a node that is guarded (see `getABarrierInstruction` or * `getABarrierNode` for the definition of what it means to be guarded). */ - IRBlock getABarrierBlock(FlowState2 state) { - result.getAnInstruction() = getABarrierInstruction(state) - } + IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) } } private module InterestingPointerAddInstruction { @@ -201,8 +197,8 @@ private module InterestingPointerAddInstruction { } /** - * A product-flow configuration for flow from an `(allocation, size)` pair to a pointer- - * arithmetic instruction that is non-strictly upper-bounded by `allocation + size`. + * A product-flow configuration for flow from an `(allocation, size)` pair to a + * pointer-arithmetic operation `pai` such that `pai <= allocation + size`. */ private module Config implements ProductFlow::StateConfigSig { class FlowState1 = Unit; @@ -210,7 +206,7 @@ private module Config implements ProductFlow::StateConfigSig { class FlowState2 = int; predicate isSourcePair( - DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2 + DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 sizeAddend ) { // In the case of an allocation like // ```cpp @@ -218,21 +214,21 @@ private module Config implements ProductFlow::StateConfigSig { // ``` // we use `state2` to remember that there was an offset (in this case an offset of `1`) added // to the size of the allocation. This state is then checked in `isSinkPair`. - exists(state1) and - hasSize(source1.asConvertedExpr(), source2, state2) + exists(unit) and + hasSize(allocSource.asConvertedExpr(), sizeSource, sizeAddend) } predicate isSinkPair( - DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2 + DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 sizeAddend ) { - exists(state1) and + exists(unit) and // We check that the delta computed by the range analysis matches the // state value that we set in `isSourcePair`. - pointerAddInstructionHasBounds0(_, sink1, sink2, state2) + pointerAddInstructionHasBounds0(_, allocSink, sizeSink, sizeAddend) } predicate isBarrier2(DataFlow::Node node, FlowState2 state) { - node = Barrier2::getABarrierNode(state) + node = SizeBarrier::getABarrierNode(state) } predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) } @@ -245,7 +241,7 @@ private module Config implements ProductFlow::StateConfigSig { private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; /** - * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the + * Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink` is the * left operand of the pointer-arithmetic operation. * * For example in, @@ -254,37 +250,37 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; * ``` * We will have: * - `pai` is `p + (size + 1)`, - * - `sink1` is `p` - * - `sink2` is `size` + * - `allocSink` is `p` + * - `sizeSink` is `size` * - `delta` is `1`. */ pragma[nomagic] private predicate pointerAddInstructionHasBounds0( - PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta + PointerAddInstruction pai, DataFlow::Node allocSink, DataFlow::Node sizeSink, int delta ) { InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and - exists(Instruction right, Instruction instr2 | + exists(Instruction right, Instruction sizeInstr | pai.getRight() = right and - pai.getLeft() = sink1.asInstruction() and - instr2 = sink2.asInstruction() and - // pai.getRight() <= sink2 + delta - bounded1(right, instr2, delta) and - not right = Barrier2::getABarrierInstruction(delta) and - not instr2 = Barrier2::getABarrierInstruction(delta) + pai.getLeft() = allocSink.asInstruction() and + sizeInstr = sizeSink.asInstruction() and + // pai.getRight() <= sizeSink + delta + bounded1(right, sizeInstr, delta) and + not right = SizeBarrier::getABarrierInstruction(delta) and + not sizeInstr = SizeBarrier::getABarrierInstruction(delta) ) } /** - * Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand - * side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai` - * is non-strictly upper bounded by the size of `alllocation` + `delta`. + * Holds if `allocation` flows to `allocSink` and `allocSink` represents the left operand + * of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and + * `b <= allocation + delta`. */ pragma[nomagic] predicate pointerAddInstructionHasBounds( - DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta + DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node allocSink, int delta ) { - exists(DataFlow::Node sink2 | - AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and - pointerAddInstructionHasBounds0(pai, sink1, sink2, delta) + exists(DataFlow::Node sizeSink | + AllocToInvalidPointerFlow::flow(allocation, _, allocSink, sizeSink) and + pointerAddInstructionHasBounds0(pai, allocSink, sizeSink, delta) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll index 078c8c92c57..48a90ab28b5 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll @@ -84,10 +84,10 @@ private module InvalidPointerToDerefBarrier { } additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int state, boolean testIsTrue + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue ) { // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue) } predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } @@ -105,12 +105,12 @@ private module InvalidPointerToDerefBarrier { private predicate operandGuardChecks( IRGuardCondition g, Operand left, Operand right, int state, boolean edge ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, int state0 | + exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k | nRight.asOperand() = right and nLeft.asOperand() = left and - BarrierConfig::isSink(nLeft, nRight, g, state0, edge) and + BarrierConfig::isSink(nLeft, nRight, g, k, edge) and state = getInvalidPointerToDerefSourceDelta(nRight) and - state0 <= state + k <= state ) } @@ -150,47 +150,50 @@ private module InvalidPointerToDerefConfig implements DataFlow::ConfigSig { private import DataFlow::Global /** - * Holds if `source1` is dataflow node that represents an allocation that flows to the - * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with - * a pointer-value that is non-strictly upper bounded by `pai + delta`. + * Holds if `allocSource` is dataflow node that represents an allocation that flows to the + * left-hand side of the pointer-arithmetic `pai`, and `derefSource <= pai + derefSourcePaiDelta`. * * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such * as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this - * case `delta` is 1. + * case `derefSourcePaiDelta` is 1. */ private predicate invalidPointerToDerefSource( - DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta + DataFlow::Node allocSource, PointerArithmeticInstruction pai, DataFlow::Node derefSource, + int deltaDerefSourceAndPai ) { - exists(int delta0 | - // Note that `delta` is not necessarily equal to `delta0`: - // `delta0` is the constant offset added to the size of the allocation, and - // delta is the constant difference between the pointer-arithmetic instruction + exists(int rhsSizeDelta | + // Note that `deltaDerefSourceAndPai` is not necessarily equal to `rhsSizeDelta`: + // `rhsSizeDelta` is the constant offset added to the size of the allocation, and + // `deltaDerefSourceAndPai` is the constant difference between the pointer-arithmetic instruction // and the instruction computing the address for which we will search for a dereference. - AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and - bounded2(derefSource.asInstruction(), pai, delta) and - delta >= 0 and + AllocToInvalidPointer::pointerAddInstructionHasBounds(allocSource, pai, _, rhsSizeDelta) and + // pai <= derefSource + deltaDerefSourceAndPai and deltaDerefSourceAndPai <= 0 is equivalent to + // derefSource >= pai + deltaDerefSourceAndPai and deltaDerefSourceAndPai >= 0 + bounded1(pai, derefSource.asInstruction(), deltaDerefSourceAndPai) and + deltaDerefSourceAndPai <= 0 and // TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2` // private to `AllocationToInvalidPointer.qll`. - not derefSource.getBasicBlock() = AllocToInvalidPointer::Barrier2::getABarrierBlock(delta0) + not derefSource.getBasicBlock() = + AllocToInvalidPointer::SizeBarrier::getABarrierBlock(rhsSizeDelta) ) } /** * Holds if `sink` is a sink for `InvalidPointerToDerefConfig` and `i` is a `StoreInstruction` that - * writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that - * reads from an address that non-strictly upper-bounds `sink`. + * writes to an address `addr` such that `addr <= sink`, or `i` is a `LoadInstruction` that + * reads from an address `addr` such that `addr <= sink`. */ pragma[inline] private predicate isInvalidPointerDerefSink( - DataFlow::Node sink, Instruction i, string operation, int delta + DataFlow::Node sink, Instruction i, string operation, int deltaDerefSinkAndDerefAddress ) { exists(AddressOperand addr, Instruction s, IRBlock b | s = sink.asInstruction() and - bounded(addr.getDef(), s, delta) and - delta >= 0 and + bounded(addr.getDef(), s, deltaDerefSinkAndDerefAddress) and + deltaDerefSinkAndDerefAddress >= 0 and i.getAnOperand() = addr and b = i.getBlock() and - not b = InvalidPointerToDerefBarrier::getABarrierBlock(delta) + not b = InvalidPointerToDerefBarrier::getABarrierBlock(deltaDerefSinkAndDerefAddress) | i instanceof StoreInstruction and operation = "write" @@ -230,13 +233,13 @@ private predicate paiForDereferenceSink(PointerArithmeticInstruction pai, DataFl */ private predicate derefSinkToOperation( DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation, - string description, int delta + string description, int deltaDerefSinkAndDerefAddress ) { - exists(Instruction i | + exists(Instruction operationInstr | paiForDereferenceSink(pai, pragma[only_bind_into](derefSink)) and - isInvalidPointerDerefSink(derefSink, i, description, delta) and - i = getASuccessor(derefSink.asInstruction()) and - operation.asInstruction() = i + isInvalidPointerDerefSink(derefSink, operationInstr, description, deltaDerefSinkAndDerefAddress) and + operationInstr = getASuccessor(derefSink.asInstruction()) and + operation.asInstruction() = operationInstr ) } diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll index 12bb50321fa..c15dc4d1d29 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll @@ -35,14 +35,5 @@ bindingset[i] pragma[inline_late] predicate bounded1(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } -/** - * Holds if `i <= b + delta`. - * - * This predicate enforces a join-order that ensures that `b` has already been bound. - */ -bindingset[b] -pragma[inline_late] -predicate bounded2(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } - /** Holds if `i <= b + delta`. */ predicate bounded = boundedImpl/3; diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected index 881a1d8383d..666434bc659 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/InvalidPointerDeref.expected @@ -132,8 +132,6 @@ edges | test.cpp:304:15:304:26 | new[] | test.cpp:308:5:308:29 | ... = ... | | test.cpp:355:14:355:27 | new[] | test.cpp:356:15:356:23 | ... + ... | | test.cpp:355:14:355:27 | new[] | test.cpp:356:15:356:23 | ... + ... | -| test.cpp:355:14:355:27 | new[] | test.cpp:357:24:357:30 | ... + ... | -| test.cpp:355:14:355:27 | new[] | test.cpp:357:24:357:30 | ... + ... | | test.cpp:355:14:355:27 | new[] | test.cpp:358:14:358:26 | * ... | | test.cpp:355:14:355:27 | new[] | test.cpp:359:14:359:32 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:356:15:356:23 | ... + ... | @@ -141,85 +139,45 @@ edges | test.cpp:356:15:356:23 | ... + ... | test.cpp:358:14:358:26 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:359:14:359:32 | * ... | | test.cpp:356:15:356:23 | ... + ... | test.cpp:359:14:359:32 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:357:24:357:30 | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:358:14:358:26 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:358:14:358:26 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:359:14:359:32 | * ... | -| test.cpp:357:24:357:30 | ... + ... | test.cpp:359:14:359:32 | * ... | | test.cpp:377:14:377:27 | new[] | test.cpp:378:15:378:23 | ... + ... | | test.cpp:377:14:377:27 | new[] | test.cpp:378:15:378:23 | ... + ... | -| test.cpp:377:14:377:27 | new[] | test.cpp:381:5:381:9 | ... ++ | -| test.cpp:377:14:377:27 | new[] | test.cpp:381:5:381:9 | ... ++ | | test.cpp:377:14:377:27 | new[] | test.cpp:384:13:384:16 | * ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:378:15:378:23 | ... + ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:384:13:384:16 | * ... | | test.cpp:378:15:378:23 | ... + ... | test.cpp:384:13:384:16 | * ... | -| test.cpp:381:5:381:9 | ... ++ | test.cpp:381:5:381:9 | ... ++ | -| test.cpp:381:5:381:9 | ... ++ | test.cpp:384:13:384:16 | * ... | | test.cpp:410:14:410:27 | new[] | test.cpp:411:15:411:23 | & ... | | test.cpp:410:14:410:27 | new[] | test.cpp:411:15:411:23 | & ... | -| test.cpp:410:14:410:27 | new[] | test.cpp:413:5:413:8 | ... ++ | -| test.cpp:410:14:410:27 | new[] | test.cpp:413:5:413:8 | ... ++ | | test.cpp:410:14:410:27 | new[] | test.cpp:415:7:415:15 | ... = ... | | test.cpp:411:15:411:23 | & ... | test.cpp:411:15:411:23 | & ... | | test.cpp:411:15:411:23 | & ... | test.cpp:415:7:415:15 | ... = ... | | test.cpp:411:15:411:23 | & ... | test.cpp:415:7:415:15 | ... = ... | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:413:5:413:8 | ... ++ | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:415:7:415:15 | ... = ... | -| test.cpp:413:5:413:8 | ... ++ | test.cpp:415:7:415:15 | ... = ... | | test.cpp:421:14:421:27 | new[] | test.cpp:422:15:422:23 | & ... | | test.cpp:421:14:421:27 | new[] | test.cpp:422:15:422:23 | & ... | -| test.cpp:421:14:421:27 | new[] | test.cpp:424:5:424:8 | ... ++ | -| test.cpp:421:14:421:27 | new[] | test.cpp:424:5:424:8 | ... ++ | | test.cpp:421:14:421:27 | new[] | test.cpp:426:7:426:15 | ... = ... | | test.cpp:422:15:422:23 | & ... | test.cpp:422:15:422:23 | & ... | | test.cpp:422:15:422:23 | & ... | test.cpp:426:7:426:15 | ... = ... | | test.cpp:422:15:422:23 | & ... | test.cpp:426:7:426:15 | ... = ... | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:424:5:424:8 | ... ++ | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:426:7:426:15 | ... = ... | -| test.cpp:424:5:424:8 | ... ++ | test.cpp:426:7:426:15 | ... = ... | | test.cpp:432:14:432:27 | new[] | test.cpp:433:15:433:23 | & ... | | test.cpp:432:14:432:27 | new[] | test.cpp:433:15:433:23 | & ... | -| test.cpp:432:14:432:27 | new[] | test.cpp:436:5:436:8 | ... ++ | -| test.cpp:432:14:432:27 | new[] | test.cpp:436:5:436:8 | ... ++ | | test.cpp:432:14:432:27 | new[] | test.cpp:438:7:438:15 | ... = ... | | test.cpp:433:15:433:23 | & ... | test.cpp:433:15:433:23 | & ... | | test.cpp:433:15:433:23 | & ... | test.cpp:438:7:438:15 | ... = ... | | test.cpp:433:15:433:23 | & ... | test.cpp:438:7:438:15 | ... = ... | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:436:5:436:8 | ... ++ | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:438:7:438:15 | ... = ... | -| test.cpp:436:5:436:8 | ... ++ | test.cpp:438:7:438:15 | ... = ... | | test.cpp:444:14:444:27 | new[] | test.cpp:445:15:445:23 | & ... | | test.cpp:444:14:444:27 | new[] | test.cpp:445:15:445:23 | & ... | -| test.cpp:444:14:444:27 | new[] | test.cpp:448:5:448:8 | ... ++ | -| test.cpp:444:14:444:27 | new[] | test.cpp:448:5:448:8 | ... ++ | | test.cpp:444:14:444:27 | new[] | test.cpp:450:7:450:15 | ... = ... | | test.cpp:445:15:445:23 | & ... | test.cpp:445:15:445:23 | & ... | | test.cpp:445:15:445:23 | & ... | test.cpp:450:7:450:15 | ... = ... | | test.cpp:445:15:445:23 | & ... | test.cpp:450:7:450:15 | ... = ... | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:448:5:448:8 | ... ++ | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:450:7:450:15 | ... = ... | -| test.cpp:448:5:448:8 | ... ++ | test.cpp:450:7:450:15 | ... = ... | | test.cpp:480:14:480:27 | new[] | test.cpp:481:15:481:23 | & ... | | test.cpp:480:14:480:27 | new[] | test.cpp:481:15:481:23 | & ... | -| test.cpp:480:14:480:27 | new[] | test.cpp:484:5:484:8 | ... ++ | -| test.cpp:480:14:480:27 | new[] | test.cpp:484:5:484:8 | ... ++ | | test.cpp:480:14:480:27 | new[] | test.cpp:486:7:486:15 | ... = ... | | test.cpp:481:15:481:23 | & ... | test.cpp:481:15:481:23 | & ... | | test.cpp:481:15:481:23 | & ... | test.cpp:486:7:486:15 | ... = ... | | test.cpp:481:15:481:23 | & ... | test.cpp:486:7:486:15 | ... = ... | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:484:5:484:8 | ... ++ | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:486:7:486:15 | ... = ... | -| test.cpp:484:5:484:8 | ... ++ | test.cpp:486:7:486:15 | ... = ... | | test.cpp:543:14:543:27 | new[] | test.cpp:548:5:548:19 | ... = ... | | test.cpp:554:14:554:27 | new[] | test.cpp:559:5:559:19 | ... = ... | | test.cpp:642:14:642:31 | new[] | test.cpp:647:5:647:19 | ... = ... | -| test.cpp:652:14:652:27 | new[] | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:652:14:652:27 | new[] | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:652:14:652:27 | new[] | test.cpp:662:3:662:11 | ... = ... | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:656:3:656:6 | ... ++ | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | -| test.cpp:656:3:656:6 | ... ++ | test.cpp:662:3:662:11 | ... = ... | | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | nodes | test.cpp:4:15:4:20 | call to malloc | semmle.label | call to malloc | @@ -318,45 +276,31 @@ nodes | test.cpp:355:14:355:27 | new[] | semmle.label | new[] | | test.cpp:356:15:356:23 | ... + ... | semmle.label | ... + ... | | test.cpp:356:15:356:23 | ... + ... | semmle.label | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | semmle.label | ... + ... | -| test.cpp:357:24:357:30 | ... + ... | semmle.label | ... + ... | | test.cpp:358:14:358:26 | * ... | semmle.label | * ... | | test.cpp:359:14:359:32 | * ... | semmle.label | * ... | | test.cpp:377:14:377:27 | new[] | semmle.label | new[] | | test.cpp:378:15:378:23 | ... + ... | semmle.label | ... + ... | | test.cpp:378:15:378:23 | ... + ... | semmle.label | ... + ... | -| test.cpp:381:5:381:9 | ... ++ | semmle.label | ... ++ | -| test.cpp:381:5:381:9 | ... ++ | semmle.label | ... ++ | | test.cpp:384:13:384:16 | * ... | semmle.label | * ... | | test.cpp:410:14:410:27 | new[] | semmle.label | new[] | | test.cpp:411:15:411:23 | & ... | semmle.label | & ... | | test.cpp:411:15:411:23 | & ... | semmle.label | & ... | -| test.cpp:413:5:413:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:413:5:413:8 | ... ++ | semmle.label | ... ++ | | test.cpp:415:7:415:15 | ... = ... | semmle.label | ... = ... | | test.cpp:421:14:421:27 | new[] | semmle.label | new[] | | test.cpp:422:15:422:23 | & ... | semmle.label | & ... | | test.cpp:422:15:422:23 | & ... | semmle.label | & ... | -| test.cpp:424:5:424:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:424:5:424:8 | ... ++ | semmle.label | ... ++ | | test.cpp:426:7:426:15 | ... = ... | semmle.label | ... = ... | | test.cpp:432:14:432:27 | new[] | semmle.label | new[] | | test.cpp:433:15:433:23 | & ... | semmle.label | & ... | | test.cpp:433:15:433:23 | & ... | semmle.label | & ... | -| test.cpp:436:5:436:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:436:5:436:8 | ... ++ | semmle.label | ... ++ | | test.cpp:438:7:438:15 | ... = ... | semmle.label | ... = ... | | test.cpp:444:14:444:27 | new[] | semmle.label | new[] | | test.cpp:445:15:445:23 | & ... | semmle.label | & ... | | test.cpp:445:15:445:23 | & ... | semmle.label | & ... | -| test.cpp:448:5:448:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:448:5:448:8 | ... ++ | semmle.label | ... ++ | | test.cpp:450:7:450:15 | ... = ... | semmle.label | ... = ... | | test.cpp:480:14:480:27 | new[] | semmle.label | new[] | | test.cpp:481:15:481:23 | & ... | semmle.label | & ... | | test.cpp:481:15:481:23 | & ... | semmle.label | & ... | -| test.cpp:484:5:484:8 | ... ++ | semmle.label | ... ++ | -| test.cpp:484:5:484:8 | ... ++ | semmle.label | ... ++ | | test.cpp:486:7:486:15 | ... = ... | semmle.label | ... = ... | | test.cpp:543:14:543:27 | new[] | semmle.label | new[] | | test.cpp:548:5:548:19 | ... = ... | semmle.label | ... = ... | @@ -364,10 +308,6 @@ nodes | test.cpp:559:5:559:19 | ... = ... | semmle.label | ... = ... | | test.cpp:642:14:642:31 | new[] | semmle.label | new[] | | test.cpp:647:5:647:19 | ... = ... | semmle.label | ... = ... | -| test.cpp:652:14:652:27 | new[] | semmle.label | new[] | -| test.cpp:656:3:656:6 | ... ++ | semmle.label | ... ++ | -| test.cpp:656:3:656:6 | ... ++ | semmle.label | ... ++ | -| test.cpp:662:3:662:11 | ... = ... | semmle.label | ... = ... | | test.cpp:667:14:667:31 | new[] | semmle.label | new[] | | test.cpp:675:7:675:23 | ... = ... | semmle.label | ... = ... | subpaths @@ -403,5 +343,4 @@ subpaths | test.cpp:548:5:548:19 | ... = ... | test.cpp:543:14:543:27 | new[] | test.cpp:548:5:548:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:543:14:543:27 | new[] | new[] | test.cpp:548:8:548:14 | src_pos | src_pos | | test.cpp:559:5:559:19 | ... = ... | test.cpp:554:14:554:27 | new[] | test.cpp:559:5:559:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:554:14:554:27 | new[] | new[] | test.cpp:559:8:559:14 | src_pos | src_pos | | test.cpp:647:5:647:19 | ... = ... | test.cpp:642:14:642:31 | new[] | test.cpp:647:5:647:19 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:642:14:642:31 | new[] | new[] | test.cpp:647:8:647:14 | src_pos | src_pos | -| test.cpp:662:3:662:11 | ... = ... | test.cpp:652:14:652:27 | new[] | test.cpp:662:3:662:11 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@ + 1. | test.cpp:652:14:652:27 | new[] | new[] | test.cpp:653:19:653:22 | size | size | | test.cpp:675:7:675:23 | ... = ... | test.cpp:667:14:667:31 | new[] | test.cpp:675:7:675:23 | ... = ... | This write might be out of bounds, as the pointer might be equal to $@ + $@. | test.cpp:667:14:667:31 | new[] | new[] | test.cpp:675:10:675:18 | ... ++ | ... ++ | diff --git a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp index cfcbb9de9a5..f1d1b983c1f 100644 --- a/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp +++ b/cpp/ql/test/experimental/query-tests/Security/CWE/CWE-193/pointer-deref/test.cpp @@ -17,7 +17,7 @@ void test2(int size) { char* q = p + size - 1; // $ alloc=L16 char a = *q; // GOOD char b = *(q - 1); // GOOD - char c = *(q + 1); // $ deref=L20 // BAD + char c = *(q + 1); // $ deref=L17->L20 // BAD char d = *(q + size); // BAD [NOT DETECTED] char e = *(q - size); // GOOD char f = *(q + size + 1); // BAD [NOT DETECTED] @@ -198,7 +198,7 @@ void test12(unsigned len, unsigned index) { return; } - p[index] = '\0'; // $ deref=L201 // BAD + p[index] = '\0'; // $ deref=L195->L201 deref=L197->L201 // BAD } void test13(unsigned len, unsigned index) { @@ -210,7 +210,7 @@ void test13(unsigned len, unsigned index) { return; } - *q = '\0'; // $ deref=L213 // BAD + *q = '\0'; // $ deref=L206->L213 deref=L209->L213 // BAD } bool unknown(); @@ -261,7 +261,7 @@ void test17(unsigned len) int *end = xs + len; // $ alloc=L260 for (int *x = xs; x <= end; x++) { - int i = *x; // $ deref=L264 // BAD + int i = *x; // $ deref=L261->L264 deref=L262->L264 // BAD } } @@ -271,7 +271,7 @@ void test18(unsigned len) int *end = xs + len; // $ alloc=L270 for (int *x = xs; x <= end; x++) { - *x = 0; // $ deref=L274 // BAD + *x = 0; // $ deref=L271->L274 deref=L272->L274 // BAD } } @@ -355,8 +355,8 @@ void test25(unsigned size) { char *xs = new char[size]; char *end = xs + size; // $ alloc=L355 char *end_plus_one = end + 1; - int val1 = *end_plus_one; // $ deref=L358+1 // BAD - int val2 = *(end_plus_one + 1); // $ deref=L359+2 // BAD + int val1 = *end_plus_one; // $ deref=L356->L358+1 deref=L357->L358+1 // BAD + int val2 = *(end_plus_one + 1); // $ deref=L356->L359+2 deref=L357->L359+2 // BAD } void test26(unsigned size) { @@ -381,7 +381,7 @@ void test27(unsigned size, bool b) { end++; } - int val = *end; // $ deref=L384+1 // BAD + int val = *end; // $ deref=L378->L384+1 deref=L381->L384+1 // BAD } void test28(unsigned size) { @@ -412,7 +412,7 @@ void test28_simple2(unsigned size) { if (xs < end) { xs++; if (xs < end + 1) { - xs[0] = 0; // $ deref=L415 // BAD + xs[0] = 0; // $ deref=L411->L415 deref=L412->L415 deref=L414->L415 // BAD } } } @@ -423,7 +423,7 @@ void test28_simple3(unsigned size) { if (xs < end) { xs++; if (xs - 1 < end) { - xs[0] = 0; // $ deref=L426 // BAD + xs[0] = 0; // $ deref=L422->L426 deref=L423->L426 deref=L425->L426 // BAD } } } @@ -435,7 +435,7 @@ void test28_simple4(unsigned size) { end++; xs++; if (xs < end) { - xs[0] = 0; // $ deref=L438 // BAD + xs[0] = 0; // $ deref=L433->L438 deref=L434->L438 deref=L435->L438 // BAD } } } @@ -447,7 +447,7 @@ void test28_simple5(unsigned size) { if (xs < end) { xs++; if (xs < end) { - xs[0] = 0; // $ deref=L450 // BAD + xs[0] = 0; // $ deref=L445->L450 deref=L446->L450 // BAD } } } @@ -483,7 +483,7 @@ void test28_simple8(unsigned size) { if (xs < end) { xs++; if (xs < end - 1) { - xs[0] = 0; // $ deref=L486+498 // BAD + xs[0] = 0; // $ deref=L481->L486+498 deref=L482->L486+498 // BAD } } } @@ -659,7 +659,7 @@ void test32(unsigned size) { xs++; if (xs >= end) return; - xs[0] = 0; // $ deref=L656->L662+1 deref=L657->L662+1 GOOD [FALSE POSITIVE] + xs[0] = 0; // $ GOOD } void test33(unsigned size, unsigned src_pos) @@ -689,4 +689,15 @@ void test_missing_call_context_2(unsigned size) { int* p = new int[size]; int* end_minus_one = pointer_arithmetic(p, size - 1); *end_minus_one = '0'; // $ deref=L680->L690->L691 // GOOD +} + +void test34(unsigned size) { + char *p = new char[size]; + char *end = p + size + 1; // $ alloc=L695 + if (p + 1 < end) { + p += 1; + } + if (p + 1 < end) { + int val = *p; // GOOD + } } \ No newline at end of file diff --git a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected index ebb54018d31..a357821fcf5 100644 --- a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected @@ -2157,3 +2157,46 @@ ssa.cpp: # 431| v431_9(void) = ReturnValue : &:r431_8, m435_4 # 431| v431_10(void) = AliasedUse : m431_3 # 431| v431_11(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| m438_2(unknown) = AliasedDefinition : +# 438| m438_3(unknown) = InitializeNonLocal : +# 438| m438_4(unknown) = Chi : total:m438_2, partial:m438_3 +# 438| r438_5(glval) = VariableAddress[a] : +# 438| m438_6(bool) = InitializeParameter[a] : &:r438_5 +# 438| r438_7(glval) = VariableAddress[x] : +# 438| m438_8(int) = InitializeParameter[x] : &:r438_7 +# 438| r438_9(glval) = VariableAddress[y] : +# 438| m438_10(int) = InitializeParameter[y] : &:r438_9 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_6 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_11(void) = ReturnVoid : +# 438| v438_12(void) = AliasedUse : m438_3 +# 438| v438_13(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_8 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_10 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected index a2390ac28e7..f9eb5b53828 100644 --- a/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected +++ b/cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir_unsound.expected @@ -2146,3 +2146,46 @@ ssa.cpp: # 431| v431_9(void) = ReturnValue : &:r431_8, m435_4 # 431| v431_10(void) = AliasedUse : m431_3 # 431| v431_11(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| m438_2(unknown) = AliasedDefinition : +# 438| m438_3(unknown) = InitializeNonLocal : +# 438| m438_4(unknown) = Chi : total:m438_2, partial:m438_3 +# 438| r438_5(glval) = VariableAddress[a] : +# 438| m438_6(bool) = InitializeParameter[a] : &:r438_5 +# 438| r438_7(glval) = VariableAddress[x] : +# 438| m438_8(int) = InitializeParameter[x] : &:r438_7 +# 438| r438_9(glval) = VariableAddress[y] : +# 438| m438_10(int) = InitializeParameter[y] : &:r438_9 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_6 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_11(void) = ReturnVoid : +# 438| v438_12(void) = AliasedUse : m438_3 +# 438| v438_13(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_8 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_10 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/ssa.cpp b/cpp/ql/test/library-tests/ir/ssa/ssa.cpp index fdeeb4ec2ba..56caf9de3b6 100644 --- a/cpp/ql/test/library-tests/ir/ssa/ssa.cpp +++ b/cpp/ql/test/library-tests/ir/ssa/ssa.cpp @@ -434,3 +434,7 @@ int noreturnTest2(int x) { } return x; } + +void Conditional(bool a, int x, int y) { + int z = a ? x : y; +} diff --git a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected index f51e8fef7ac..96b35a76c3b 100644 --- a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected @@ -2002,3 +2002,45 @@ ssa.cpp: # 431| v431_8(void) = ReturnValue : &:r431_7, m435_4 # 431| v431_9(void) = AliasedUse : ~m? # 431| v431_10(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| mu438_2(unknown) = AliasedDefinition : +# 438| mu438_3(unknown) = InitializeNonLocal : +# 438| r438_4(glval) = VariableAddress[a] : +# 438| m438_5(bool) = InitializeParameter[a] : &:r438_4 +# 438| r438_6(glval) = VariableAddress[x] : +# 438| m438_7(int) = InitializeParameter[x] : &:r438_6 +# 438| r438_8(glval) = VariableAddress[y] : +# 438| m438_9(int) = InitializeParameter[y] : &:r438_8 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_5 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_10(void) = ReturnVoid : +# 438| v438_11(void) = AliasedUse : ~m? +# 438| v438_12(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_7 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_9 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected index f51e8fef7ac..96b35a76c3b 100644 --- a/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected +++ b/cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir_unsound.expected @@ -2002,3 +2002,45 @@ ssa.cpp: # 431| v431_8(void) = ReturnValue : &:r431_7, m435_4 # 431| v431_9(void) = AliasedUse : ~m? # 431| v431_10(void) = ExitFunction : + +# 438| void Conditional(bool, int, int) +# 438| Block 0 +# 438| v438_1(void) = EnterFunction : +# 438| mu438_2(unknown) = AliasedDefinition : +# 438| mu438_3(unknown) = InitializeNonLocal : +# 438| r438_4(glval) = VariableAddress[a] : +# 438| m438_5(bool) = InitializeParameter[a] : &:r438_4 +# 438| r438_6(glval) = VariableAddress[x] : +# 438| m438_7(int) = InitializeParameter[x] : &:r438_6 +# 438| r438_8(glval) = VariableAddress[y] : +# 438| m438_9(int) = InitializeParameter[y] : &:r438_8 +# 439| r439_1(glval) = VariableAddress[z] : +# 439| r439_2(glval) = VariableAddress[a] : +# 439| r439_3(bool) = Load[a] : &:r439_2, m438_5 +# 439| v439_4(void) = ConditionalBranch : r439_3 +#-----| False -> Block 3 +#-----| True -> Block 2 + +# 439| Block 1 +# 439| m439_5(int) = Phi : from 2:m439_12, from 3:m439_16 +# 439| r439_6(glval) = VariableAddress[#temp439:13] : +# 439| r439_7(int) = Load[#temp439:13] : &:r439_6, m439_5 +# 439| m439_8(int) = Store[z] : &:r439_1, r439_7 +# 440| v440_1(void) = NoOp : +# 438| v438_10(void) = ReturnVoid : +# 438| v438_11(void) = AliasedUse : ~m? +# 438| v438_12(void) = ExitFunction : + +# 439| Block 2 +# 439| r439_9(glval) = VariableAddress[x] : +# 439| r439_10(int) = Load[x] : &:r439_9, m438_7 +# 439| r439_11(glval) = VariableAddress[#temp439:13] : +# 439| m439_12(int) = Store[#temp439:13] : &:r439_11, r439_10 +#-----| Goto -> Block 1 + +# 439| Block 3 +# 439| r439_13(glval) = VariableAddress[y] : +# 439| r439_14(int) = Load[y] : &:r439_13, m438_9 +# 439| r439_15(glval) = VariableAddress[#temp439:13] : +# 439| m439_16(int) = Store[#temp439:13] : &:r439_15, r439_14 +#-----| Goto -> Block 1 diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst index 911c930458e..92af2679dc2 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp-new.rst @@ -168,74 +168,61 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration`` as follows: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.new.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + The following predicates are defined in the configuration: - ``isSource``—defines where data may flow from - ``isSink``—defines where data may flow to - ``isBarrier``—optional, restricts the data flow -- ``isBarrierGuard``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration`` as follows: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.new.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -The following predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isSanitizerGuard``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ @@ -247,17 +234,15 @@ The following data flow configuration tracks data flow from environment variable import cpp import semmle.code.cpp.dataflow.new.DataFlow - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Function getenv | source.asIndirectExpr(1).(FunctionCall).getTarget() = getenv and getenv.hasGlobalName("getenv") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasGlobalName("fopen") @@ -265,16 +250,17 @@ The following data flow configuration tracks data flow from environment variable } } + module EnvironmentToFileFlow = DataFlow::Global; + from - Expr getenv, Expr fopen, EnvironmentToFileConfiguration config, DataFlow::Node source, - DataFlow::Node sink + Expr getenv, Expr fopen, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = getenv and sink.asIndirectExpr(1) = fopen and - config.hasFlow(source, sink) + EnvironmentToFileFlow::flow(source, sink) select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'" -The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes. +The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isBarrier`` to prevent taint from propagating through them. It also uses ``isAdditionalFlowStep`` to add flow from loop bounds to loop indexes. .. code-block:: ql @@ -282,18 +268,16 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` import semmle.code.cpp.controlflow.Guards import semmle.code.cpp.dataflow.new.TaintTracking - class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration { - NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" } - - override predicate isSource(DataFlow::Node node) { + module NetworkToBufferSizeConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl") } - override predicate isSink(DataFlow::Node node) { + predicate isSink(DataFlow::Node node) { exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset()) } - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists(Loop loop, LoopCounter lc | loop = lc.getALoop() and loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr() @@ -302,7 +286,7 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` ) } - override predicate isSanitizer(DataFlow::Node node) { + predicate isBarrier(DataFlow::Node node) { exists(GuardCondition gc, Variable v | gc.getAChild*() = v.getAnAccess() and node.asExpr() = v.getAnAccess() and @@ -312,8 +296,10 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` } } - from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf - where conf.hasFlow(ntohl, offset) + module NetworkToBufferSizeFlow = TaintTracking::Global; + + from DataFlow::Node ntohl, DataFlow::Node offset + where NetworkToBufferSizeFlow::flow(ntohl, offset) select offset, "This array offset may be influenced by $@.", ntohl, "converted data from the network" @@ -353,14 +339,12 @@ Exercise 2 import cpp import semmle.code.cpp.dataflow.new.DataFlow - class LiteralToGethostbynameConfiguration extends DataFlow::Configuration { - LiteralToGethostbynameConfiguration() { this = "LiteralToGethostbynameConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module LiteralToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asIndirectExpr(1) instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname") @@ -368,13 +352,14 @@ Exercise 2 } } + module LiteralToGethostbynameFlow = DataFlow::Global; + from - StringLiteral sl, FunctionCall fc, LiteralToGethostbynameConfiguration cfg, DataFlow::Node source, - DataFlow::Node sink + StringLiteral sl, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = sl and sink.asIndirectExpr(1) = fc.getArgument(0) and - cfg.hasFlow(source, sink) + LiteralToGethostbynameFlow::flow(source, sink) select sl, fc Exercise 3 @@ -401,12 +386,10 @@ Exercise 4 GetenvSource() { this.asIndirectExpr(1).(FunctionCall).getTarget().hasGlobalName("getenv") } } - class GetenvToGethostbynameConfiguration extends DataFlow::Configuration { - GetenvToGethostbynameConfiguration() { this = "GetenvToGethostbynameConfiguration" } + module GetenvToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(FunctionCall fc | sink.asIndirectExpr(1) = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname") @@ -414,13 +397,14 @@ Exercise 4 } } + module GetenvToGethostbynameFlow = DataFlow::Global; + from - Expr getenv, FunctionCall fc, GetenvToGethostbynameConfiguration cfg, DataFlow::Node source, - DataFlow::Node sink + Expr getenv, FunctionCall fc, DataFlow::Node source, DataFlow::Node sink where source.asIndirectExpr(1) = getenv and sink.asIndirectExpr(1) = fc.getArgument(0) and - cfg.hasFlow(source, sink) + GetenvToGethostbynameFlow::flow(source, sink) select getenv, fc Further reading @@ -430,4 +414,4 @@ Further reading .. include:: ../reusables/cpp-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst index 0776834d243..3440f3129e4 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-cpp.rst @@ -152,74 +152,62 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration`` as follows: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + + The following predicates are defined in the configuration: - ``isSource``—defines where data may flow from - ``isSink``—defines where data may flow to - ``isBarrier``—optional, restricts the data flow -- ``isBarrierGuard``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration`` as follows: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global`` as follows: .. code-block:: ql import semmle.code.cpp.dataflow.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -The following predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isSanitizerGuard``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Examples ~~~~~~~~ @@ -230,17 +218,15 @@ The following data flow configuration tracks data flow from environment variable import semmle.code.cpp.dataflow.DataFlow - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists (Function getenv | source.asExpr().(FunctionCall).getTarget() = getenv and getenv.hasGlobalName("getenv") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasGlobalName("fopen") @@ -248,12 +234,14 @@ The following data flow configuration tracks data flow from environment variable } } - from Expr getenv, Expr fopen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(getenv), DataFlow::exprNode(fopen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr getenv, Expr fopen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(getenv), DataFlow::exprNode(fopen)) select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'" -The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes. +The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isBarrier`` to prevent taint from propagating through them. It also uses ``isAdditionalFlowStep`` to add flow from loop bounds to loop indexes. .. code-block:: ql @@ -261,18 +249,16 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` import semmle.code.cpp.controlflow.Guards import semmle.code.cpp.dataflow.TaintTracking - class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration { - NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" } - - override predicate isSource(DataFlow::Node node) { + module NetworkToBufferSizeConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node node) { node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl") } - override predicate isSink(DataFlow::Node node) { + predicate isSink(DataFlow::Node node) { exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset()) } - override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists(Loop loop, LoopCounter lc | loop = lc.getALoop() and loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr() | @@ -280,7 +266,7 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` ) } - override predicate isSanitizer(DataFlow::Node node) { + predicate isBarrier(DataFlow::Node node) { exists(GuardCondition gc, Variable v | gc.getAChild*() = v.getAnAccess() and node.asExpr() = v.getAnAccess() and @@ -289,8 +275,10 @@ The following taint-tracking configuration tracks data from a call to ``ntohl`` } } - from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf - where conf.hasFlow(ntohl, offset) + module NetworkToBufferSizeFlow = TaintTracking::Global; + + from DataFlow::Node ntohl, DataFlow::Node offset + where NetworkToBufferSizeFlow::flow(ntohl, offset) select offset, "This array offset may be influenced by $@.", ntohl, "converted data from the network" @@ -327,24 +315,22 @@ Exercise 2 import semmle.code.cpp.dataflow.DataFlow - class LiteralToGethostbynameConfiguration extends DataFlow::Configuration { - LiteralToGethostbynameConfiguration() { - this = "LiteralToGethostbynameConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module LiteralToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asExpr() instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname")) } } - from StringLiteral sl, FunctionCall fc, LiteralToGethostbynameConfiguration cfg - where cfg.hasFlow(DataFlow::exprNode(sl), DataFlow::exprNode(fc.getArgument(0))) + module LiteralToGethostbynameFlow = DataFlow::Global; + + from StringLiteral sl, FunctionCall fc + where LiteralToGethostbynameFlow::flow(DataFlow::exprNode(sl), DataFlow::exprNode(fc.getArgument(0))) select sl, fc Exercise 3 @@ -373,24 +359,22 @@ Exercise 4 } } - class GetenvToGethostbynameConfiguration extends DataFlow::Configuration { - GetenvToGethostbynameConfiguration() { - this = "GetenvToGethostbynameConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module GetenvToGethostbynameConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists (FunctionCall fc | sink.asExpr() = fc.getArgument(0) and fc.getTarget().hasName("gethostbyname")) } } - from DataFlow::Node getenv, FunctionCall fc, GetenvToGethostbynameConfiguration cfg - where cfg.hasFlow(getenv, DataFlow::exprNode(fc.getArgument(0))) + module GetenvToGethostbynameFlow = DataFlow::Global; + + from DataFlow::Node getenv, FunctionCall fc + where GetenvToGethostbynameFlow::flow(getenv, DataFlow::exprNode(fc.getArgument(0))) select getenv.asExpr(), fc Further reading @@ -400,4 +384,4 @@ Further reading .. include:: ../reusables/cpp-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst index 4d715b7313c..5bfc939aa76 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-csharp.rst @@ -146,24 +146,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration``: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import csharp - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -171,45 +171,36 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import csharp - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ @@ -228,12 +219,8 @@ This query shows a data flow configuration that uses all public API parameters a import csharp import semmle.code.csharp.dataflow.flowsources.PublicCallableParameter - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { - this = "..." - } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof PublicCallableParameterFlowSource } @@ -243,7 +230,6 @@ This query shows a data flow configuration that uses all public API parameters a Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data flow node. - ``DataFlow::ExprNode`` - an expression behaving as a data flow node. @@ -261,8 +247,6 @@ Class hierarchy - ``WcfRemoteFlowSource`` - data flow from a WCF web service. - ``AspNetServiceRemoteFlowSource`` - data flow from an ASP.NET web service. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples ~~~~~~~~ @@ -272,17 +256,15 @@ This data flow configuration tracks data flow from environment variables to open import csharp - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "Environment opening files" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Method m | m = source.asExpr().(MethodCall).getTarget() and m.hasQualifiedName("System.Environment.GetEnvironmentVariable") ) } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(MethodCall mc | mc.getTarget().hasQualifiedName("System.IO.File.Open") and sink.asExpr() = mc.getArgument(0) @@ -290,8 +272,10 @@ This data flow configuration tracks data flow from environment variables to open } } - from Expr environment, Expr fileOpen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr environment, Expr fileOpen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) select fileOpen, "This 'File.Open' uses data from $@.", environment, "call to 'GetEnvironmentVariable'" @@ -435,21 +419,21 @@ Exercise 2 import csharp - class Configuration extends DataFlow::Configuration { - Configuration() { this="String to System.Uri" } - - override predicate isSource(DataFlow::Node src) { + module StringToUriConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src.asExpr().hasValue() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call c | c.getTarget().(Constructor).getDeclaringType().hasQualifiedName("System.Uri") and sink.asExpr()=c.getArgument(0)) } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module StringToUriFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where StringToUriFlow::flow(src, sink) select src, "This string constructs a 'System.Uri' $@.", sink, "here" Exercise 3 @@ -476,21 +460,21 @@ Exercise 4 } } - class Configuration extends DataFlow::Configuration { - Configuration() { this="Environment to System.Uri" } - - override predicate isSource(DataFlow::Node src) { + module EnvironmentToUriConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src instanceof EnvironmentVariableFlowSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call c | c.getTarget().(Constructor).getDeclaringType().hasQualifiedName("System.Uri") and sink.asExpr()=c.getArgument(0)) } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module EnvironmentToUriFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where EnvironmentToUriFlow::flow(src, sink) select src, "This environment variable constructs a 'System.Uri' $@.", sink, "here" Exercise 5 diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst index 2eccdf5e103..dc08ae11e79 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-java.rst @@ -160,24 +160,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -You use the global data flow library by extending the class ``DataFlow::Configuration``: +You use the global data flow library by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import semmle.code.java.dataflow.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "MyDataFlowConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource``—defines where data may flow from @@ -185,47 +185,36 @@ These predicates are defined in the configuration: - ``isBarrier``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps -The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be a unique name, for example, the name of your class. - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguration dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Data flow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. You use the global taint tracking library by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow as local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. You use the global taint tracking library by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import semmle.code.java.dataflow.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "MyTaintTrackingConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource``—defines where taint may flow from -- ``isSink``—defines where taint may flow to -- ``isSanitizer``—optional, restricts the taint flow -- ``isAdditionalTaintStep``—optional, adds additional taint steps - -Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration. - -The taint tracking analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Flow sources ~~~~~~~~~~~~ @@ -242,18 +231,16 @@ This query shows a taint-tracking configuration that uses remote user input as d import java import semmle.code.java.dataflow.FlowSources - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { - this = "..." - } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } ... } + module MyTaintFlow = TaintTracking::Global; + Exercises ~~~~~~~~~ @@ -287,16 +274,12 @@ Exercise 2 import semmle.code.java.dataflow.DataFlow - class Configuration extends DataFlow::Configuration { - Configuration() { - this = "LiteralToURL Configuration" - } - - override predicate isSource(DataFlow::Node source) { + module LiteralToURLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asExpr() instanceof StringLiteral } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call call | sink.asExpr() = call.getArgument(0) and call.getCallee().(Constructor).getDeclaringType().hasQualifiedName("java.net", "URL") @@ -304,8 +287,10 @@ Exercise 2 } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module LiteralToURLFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where LiteralToURLFlow::flow(src, sink) select src, "This string constructs a URL $@.", sink, "here" Exercise 3 @@ -340,16 +325,12 @@ Exercise 4 } } - class GetenvToURLConfiguration extends DataFlow::Configuration { - GetenvToURLConfiguration() { - this = "GetenvToURLConfiguration" - } - - override predicate isSource(DataFlow::Node source) { + module GetenvToURLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof GetenvSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(Call call | sink.asExpr() = call.getArgument(0) and call.getCallee().(Constructor).getDeclaringType().hasQualifiedName("java.net", "URL") @@ -357,8 +338,10 @@ Exercise 4 } } - from DataFlow::Node src, DataFlow::Node sink, GetenvToURLConfiguration config - where config.hasFlow(src, sink) + module GetenvToURLFlow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where GetenvToURLFlow::flow(src, sink) select src, "This environment variable constructs a URL $@.", sink, "here" Further reading @@ -368,4 +351,4 @@ Further reading .. include:: ../reusables/java-further-reading.rst -.. include:: ../reusables/codeql-ref-tools-further-reading.rst \ No newline at end of file +.. include:: ../reusables/codeql-ref-tools-further-reading.rst diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst index 2ad5fc925f0..61422153735 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-python.rst @@ -204,24 +204,24 @@ Global data flow tracks data flow throughout the entire program, and is therefor Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -The global data flow library is used by extending the class ``DataFlow::Configuration``: +The global data flow library is used by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import python - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -229,45 +229,36 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name (for instance the class name). - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import python - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -283,7 +274,6 @@ For global flow, it is also useful to restrict sources to instances of ``LocalSo Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data flow node. - ``DataFlow::CfgNode`` - a control-flow node behaving as a data flow node. @@ -305,8 +295,6 @@ Class hierarchy - ``Concepts::HTTP::Server::RouteSetup`` - a data-flow node that sets up a route on a server. - ``Concepts::HTTP::Server::HttpResponse`` - a data-flow node that creates a HTTP response on a server. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples ~~~~~~~~ @@ -320,20 +308,20 @@ This query shows a data flow configuration that uses all network input as data s import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.Concepts - class RemoteToFileConfiguration extends TaintTracking::Configuration { - RemoteToFileConfiguration() { this = "RemoteToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module RemoteToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fa).getAPathArgument() } } - from DataFlow::Node input, DataFlow::Node fileAccess, RemoteToFileConfiguration config - where config.hasFlow(input, fileAccess) + module RemoteToFileFlow = TaintTracking::Global; + + from DataFlow::Node input, DataFlow::Node fileAccess + where RemoteToFileFlow::flow(input, fileAccess) select fileAccess, "This file access uses data from $@.", input, "user-controllable input." @@ -345,14 +333,12 @@ This data flow configuration tracks data flow from environment variables to open import semmle.python.dataflow.new.TaintTracking import semmle.python.ApiGraphs - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source = API::moduleImport("os").getMember("getenv").getACall() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { exists(DataFlow::CallCfgNode call | call = API::moduleImport("os").getMember("open").getACall() and sink = call.getArg(0) @@ -360,8 +346,10 @@ This data flow configuration tracks data flow from environment variables to open } } - from Expr environment, Expr fileOpen, EnvironmentToFileConfiguration config - where config.hasFlow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) + module EnvironmentToFileFlow = DataFlow::Global; + + from Expr environment, Expr fileOpen + where EnvironmentToFileFlow::flow(DataFlow::exprNode(environment), DataFlow::exprNode(fileOpen)) select fileOpen, "This call to 'os.open' uses data from $@.", environment, "call to 'os.getenv'" diff --git a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst index b326bfa59aa..6a603229376 100644 --- a/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst +++ b/docs/codeql/codeql-language-guides/analyzing-data-flow-in-ruby.rst @@ -224,24 +224,24 @@ However, global data flow is less precise than local data flow, and the analysis Using global data flow ~~~~~~~~~~~~~~~~~~~~~~ -You can use the global data flow library by extending the class ``DataFlow::Configuration``: +You can use the global data flow library by implementing the signature ``DataFlow::ConfigSig`` and applying the module ``DataFlow::Global``: .. code-block:: ql import codeql.ruby.DataFlow - class MyDataFlowConfiguration extends DataFlow::Configuration { - MyDataFlowConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } + module MyFlow = DataFlow::Global; + These predicates are defined in the configuration: - ``isSource`` - defines where data may flow from. @@ -249,14 +249,12 @@ These predicates are defined in the configuration: - ``isBarrier`` - optionally, restricts the data flow. - ``isAdditionalFlowStep`` - optionally, adds additional flow steps. -The characteristic predicate (``MyDataFlowConfiguration()``) defines the name of the configuration, so ``"..."`` must be replaced with a unique name (for instance the class name). - -The data flow analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``: +The data flow analysis is performed using the predicate ``flow(DataFlow::Node source, DataFlow::Node sink)``: .. code-block:: ql - from MyDataFlowConfiguation dataflow, DataFlow::Node source, DataFlow::Node sink - where dataflow.hasFlow(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select source, "Dataflow to $@.", sink, sink.toString() Using global taint tracking @@ -264,33 +262,26 @@ Using global taint tracking Global taint tracking is to global data flow what local taint tracking is to local data flow. That is, global taint tracking extends global data flow with additional non-value-preserving steps. -The global taint tracking library is used by extending the class ``TaintTracking::Configuration``: +The global taint tracking library is used by applying the module ``TaintTracking::Global`` to your configuration instead of ``DataFlow::Global``: .. code-block:: ql import codeql.ruby.DataFlow import codeql.ruby.TaintTracking - class MyTaintTrackingConfiguration extends TaintTracking::Configuration { - MyTaintTrackingConfiguration() { this = "..." } - - override predicate isSource(DataFlow::Node source) { + module MyFlowConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { ... } } -These predicates are defined in the configuration: + module MyFlow = TaintTracking::Global; -- ``isSource`` - defines where taint may flow from. -- ``isSink`` - defines where taint may flow to. -- ``isSanitizer`` - optionally, restricts the taint flow. -- ``isAdditionalTaintStep`` - optionally, adds additional taint steps. - -Similar to global data flow, the characteristic predicate (``MyTaintTrackingConfiguration()``) defines the unique name of the configuration and the taint analysis is performed using the predicate ``hasFlow(DataFlow::Node source, DataFlow::Node sink)``. +The resulting module has an identical signature to the one obtained from ``DataFlow::Global``. Predefined sources and sinks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -306,7 +297,6 @@ The predefined sources generally do that. Class hierarchy ~~~~~~~~~~~~~~~ -- ``DataFlow::Configuration`` - base class for custom global data flow analysis. - ``DataFlow::Node`` - an element behaving as a data-flow node. - ``DataFlow::LocalSourceNode`` - a local origin of data, as a data-flow node. - ``DataFlow::ExprNode`` - an expression behaving as a data-flow node. @@ -321,13 +311,11 @@ Class hierarchy - ``Concepts::HTTP::Server::RouteSetup`` - a data-flow node that sets up a route on a server. - ``Concepts::HTTP::Server::HttpResponse`` - a data-flow node that creates an HTTP response on a server. -- ``TaintTracking::Configuration`` - base class for custom global taint tracking analysis. - Examples of global data flow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following global taint-tracking query finds path arguments in filesystem accesses that can be controlled by a remote user. - - Since this is a taint-tracking query, the configuration class extends ``TaintTracking::Configuration``. + - Since this is a taint-tracking query, the ``TaintTracking::Global`` module is used. - The ``isSource`` predicate defines sources as any data-flow nodes that are instances of ``RemoteFlowSource``. - The ``isSink`` predicate defines sinks as path arguments in any filesystem access, using ``FileSystemAccess`` from the ``Concepts`` library. @@ -338,22 +326,22 @@ The following global taint-tracking query finds path arguments in filesystem acc import codeql.ruby.Concepts import codeql.ruby.dataflow.RemoteFlowSources - class RemoteToFileConfiguration extends TaintTracking::Configuration { - RemoteToFileConfiguration() { this = "RemoteToFileConfiguration" } + module RemoteToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } - - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fa).getAPathArgument() } } + + module RemoteToFileFlow = TaintTracking::Global; - from DataFlow::Node input, DataFlow::Node fileAccess, RemoteToFileConfiguration config - where config.hasFlow(input, fileAccess) + from DataFlow::Node input, DataFlow::Node fileAccess + where RemoteToFileFlow::flow(input, fileAccess) select fileAccess, "This file access uses data from $@.", input, "user-controllable input." The following global data-flow query finds calls to ``File.open`` where the filename argument comes from an environment variable. - - Since this is a data-flow query, the configuration class extends ``DataFlow::Configuration``. + - Since this is a data-flow query, the ``DataFlow::Global`` module is used. - The ``isSource`` predicate defines sources as expression nodes representing lookups on the ``ENV`` hash. - The ``isSink`` predicate defines sinks as the first argument in any call to ``File.open``. @@ -363,23 +351,23 @@ The following global data-flow query finds calls to ``File.open`` where the file import codeql.ruby.controlflow.CfgNodes import codeql.ruby.ApiGraphs - class EnvironmentToFileConfiguration extends DataFlow::Configuration { - EnvironmentToFileConfiguration() { this = "EnvironmentToFileConfiguration" } - - override predicate isSource(DataFlow::Node source) { + module EnvironmentToFileConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(ExprNodes::ConstantReadAccessCfgNode env | env.getExpr().getName() = "ENV" and env = source.asExpr().(ExprNodes::ElementReferenceCfgNode).getReceiver() ) } - - override predicate isSink(DataFlow::Node sink) { + + predicate isSink(DataFlow::Node sink) { sink = API::getTopLevelMember("File").getAMethodCall("open").getArgument(0) } } + + module EnvironmentToFileFlow = DataFlow::Global; - from EnvironmentToFileConfiguration config, DataFlow::Node environment, DataFlow::Node fileOpen - where config.hasFlow(environment, fileOpen) + from DataFlow::Node environment, DataFlow::Node fileOpen + where EnvironmentToFileFlow::flow(environment, fileOpen) select fileOpen, "This call to 'File.open' uses data from $@.", environment, "an environment variable" diff --git a/docs/codeql/codeql-language-guides/codeql-library-for-go.rst b/docs/codeql/codeql-language-guides/codeql-library-for-go.rst index f1f77c5150e..33468ab7a71 100644 --- a/docs/codeql/codeql-language-guides/codeql-library-for-go.rst +++ b/docs/codeql/codeql-language-guides/codeql-library-for-go.rst @@ -494,14 +494,14 @@ which are sets of data-flow nodes. Given these three sets, CodeQL provides a gen finding paths from a source to a sink, possibly going into and out of functions and fields, but never flowing through a barrier. -To define a data-flow configuration, you can define a subclass of ``DataFlow::Configuration``, -overriding the member predicates ``isSource``, ``isSink``, and ``isBarrier`` to define the sets of -sources, sinks, and barriers. +To define a data-flow configuration, you can define a module implementing ``DataFlow::ConfigSig``, +including the predicates ``isSource``, ``isSink``, and ``isBarrier`` to define the sets of +sources, sinks, and barriers. Data flow is then computed by applying +``DataFlow::Global<..>`` to the configuration. Going beyond pure data flow, many security analyses need to perform more general `taint tracking`, which also considers flow through value-transforming operations such as string operations. To track -taint, you can define a subclass of ``TaintTracking::Configuration``, which works similar to -data-flow configurations. +taint, you apply ``TaintTracking::Global<..>`` to your configuration instead. A detailed exposition of global data flow and taint tracking is out of scope for this brief introduction. For a general overview of data flow and taint tracking, see ":ref:`About data flow analysis `." diff --git a/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst b/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst index 7ac699b61c2..58a16b4a464 100644 --- a/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst +++ b/docs/codeql/codeql-language-guides/using-api-graphs-in-ruby.rst @@ -161,20 +161,20 @@ is read flows into a call to ``File.write``. import codeql.ruby.DataFlow import codeql.ruby.ApiGraphs - class Configuration extends DataFlow::Configuration { - Configuration() { this = "File read/write Configuration" } - - override predicate isSource(DataFlow::Node source) { + module Configuration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source = API::getTopLevelMember("File").getMethod("read").getReturn().asSource() } - override predicate isSink(DataFlow::Node sink) { + predicate isSink(DataFlow::Node sink) { sink = API::getTopLevelMember("File").getMethod("write").getParameter(1).asSink() } } - from DataFlow::Node src, DataFlow::Node sink, Configuration config - where config.hasFlow(src, sink) + module Flow = DataFlow::Global; + + from DataFlow::Node src, DataFlow::Node sink + where Flow::flow(src, sink) select src, "The data read here flows into a $@ call.", sink, "File.write" Further reading diff --git a/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst b/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst index 5fdac594389..a2391e40332 100644 --- a/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst +++ b/docs/codeql/ql-training/cpp/global-data-flow-cpp.rst @@ -47,8 +47,8 @@ The library class ``SecurityOptions`` provides a (configurable) model of what co import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSource(DataFlow::Node source) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists (SecurityOptions opts | opts.isUserInput(source.asExpr(), _) ) @@ -70,8 +70,8 @@ Use the ``FormattingFunction`` class to fill in the definition of ``isSink``. import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { /* Fill me in */ } ... @@ -90,8 +90,8 @@ Use the ``FormattingFunction`` class, we can write the sink as: import semmle.code.cpp.security.Security - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { exists (FormattingFunction ff, Call c | c.getTarget() = ff and c.getArgument(ff.getFormatParameterIndex()) = sink.asExpr() @@ -117,9 +117,8 @@ Add an additional taint step that (heuristically) taints a local variable if it .. code-block:: ql - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isAdditionalTaintStep(DataFlow::Node pred, - DataFlow::Node succ) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { exists (Call c, Expr arg, LocalVariable lv | arg = c.getAnArgument() and arg = pred.asExpr() and @@ -138,8 +137,8 @@ Add a sanitizer, stopping propagation at parameters of formatting functions, to .. code-block:: ql - class TaintedFormatConfig extends TaintTracking::Configuration { - override predicate isSanitizer(DataFlow::Node nd) { + module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isBarrier(DataFlow::Node nd) { exists (FormattingFunction ff, int idx | idx = ff.getFormatParameterIndex() and nd = DataFlow::parameterNode(ff.getParameter(idx)) diff --git a/docs/codeql/ql-training/java/apache-struts-java.rst b/docs/codeql/ql-training/java/apache-struts-java.rst index 133c64a5851..24186bda48f 100644 --- a/docs/codeql/ql-training/java/apache-struts-java.rst +++ b/docs/codeql/ql-training/java/apache-struts-java.rst @@ -56,7 +56,7 @@ Finding the RCE yourself **Hint**: Use ``Method.getDeclaringType()`` and ``Type.getASupertype()`` -#. Implement a ``DataFlow::Configuration``, defining the source as the first parameter of a ``toObject`` method, and the sink as an instance of ``UnsafeDeserializationSink``. +#. Implement a ``DataFlow::ConfigSig``, defining the source as the first parameter of a ``toObject`` method, and the sink as an instance of ``UnsafeDeserializationSink``. **Hint**: Use ``Node::asParameter()`` @@ -99,13 +99,13 @@ Model answer, step 3 * Configuration that tracks the flow of taint from the first parameter of * `ContentTypeHandler.toObject` to an instance of unsafe deserialization. */ - class StrutsUnsafeDeserializationConfig extends Configuration { - StrutsUnsafeDeserializationConfig() { this = "StrutsUnsafeDeserializationConfig" } - override predicate isSource(Node source) { + module StrutsUnsafeDeserializationConfig implements ConfigSig { + predicate isSource(Node source) { source.asParameter() = any(ContentTypeHandlerDeserialization des).getParameter(0) } - override predicate isSink(Node sink) { sink instanceof UnsafeDeserializationSink } + predicate isSink(Node sink) { sink instanceof UnsafeDeserializationSink } } + module StrutsUnsafeDeserializationFlow = Global; Model answer, step 4 ==================== @@ -114,9 +114,8 @@ Model answer, step 4 import PathGraph ... - from PathNode source, PathNode sink, StrutsUnsafeDeserializationConfig conf - where conf.hasFlowPath(source, sink) - and sink.getNode() instanceof UnsafeDeserializationSink - select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input" + from PathNode source, PathNode sink + where StrutsUnsafeDeserializationFlow::flowPath(source, sink) + select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input" More full-featured version: https://github.com/github/securitylab/tree/main/CodeQL_Queries/java/Apache_Struts_CVE-2017-9805 diff --git a/docs/codeql/ql-training/java/global-data-flow-java.rst b/docs/codeql/ql-training/java/global-data-flow-java.rst index 940411d41a3..ddee9645d17 100644 --- a/docs/codeql/ql-training/java/global-data-flow-java.rst +++ b/docs/codeql/ql-training/java/global-data-flow-java.rst @@ -63,12 +63,12 @@ We want to look for method calls where the method name is ``getNamespace()``, an import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSource(DataFlow::Node source) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { exists(Method m | - m.getName() = "getNamespace" and - m.getDeclaringType().getName() = "ActionProxy" and - source.asExpr() = m.getAReference() + m.getName() = "getNamespace" and + m.getDeclaringType().getName() = "ActionProxy" and + source.asExpr() = m.getAReference() ) } ... @@ -90,8 +90,8 @@ Fill in the definition of ``isSink``. import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { /* Fill me in */ } ... @@ -110,9 +110,9 @@ Find a method access to ``compileAndExecute``, and mark the first argument. import semmle.code.java.security.Security - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSink(DataFlow::Node sink) { - exists(MethodAccess ma | + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isSink(DataFlow::Node sink) { + exists(MethodAccess ma | ma.getMethod().getName() = "compileAndExecute" and ma.getArgument(0) = sink.asExpr() ) @@ -133,8 +133,8 @@ A sanitizer allows us to *prevent* flow through a particular node in the graph. .. code-block:: ql - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isSanitizer(DataFlow::Node nd) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isBarrier(DataFlow::Node nd) { nd.getEnclosingCallable() .getDeclaringType() .getName() = "ValueStackShadowMap" @@ -149,9 +149,8 @@ Add an additional taint step that (heuristically) taints a local variable if it .. code-block:: ql - class TaintedOGNLConfig extends TaintTracking::Configuration { - override predicate isAdditionalTaintStep(DataFlow::Node node1, - DataFlow::Node node2) { + module TaintedOGNLConfig implements DataFlow::ConfigSig { + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { exists(Field f, RefType t | node1.asExpr() = f.getAnAssignedValue() and node2.asExpr() = f.getAnAccess() and diff --git a/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql b/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql index 9020da5ea2c..3d203c0e147 100644 --- a/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql +++ b/docs/codeql/ql-training/query-examples/cpp/global-data-flow-cpp-1.ql @@ -1,14 +1,14 @@ import cpp import semmle.code.cpp.dataflow.TaintTracking -class TaintedFormatConfig extends TaintTracking::Configuration { - TaintedFormatConfig() { this = "TaintedFormatConfig" } +module TaintedFormatConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { /* TBD */ } - override predicate isSource(DataFlow::Node source) { /* TBD */ } - - override predicate isSink(DataFlow::Node sink) { /* TBD */ } + predicate isSink(DataFlow::Node sink) { /* TBD */ } } -from TaintedFormatConfig cfg, DataFlow::Node source, DataFlow::Node sink -where cfg.hasFlow(source, sink) +module TaintedFormatFlow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink +where TaintedFormatFlow::flow(source, sink) select sink, "This format string may be derived from a $@.", source, "user-controlled value" diff --git a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql index 7604b20eb95..431787cac45 100644 --- a/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql +++ b/docs/codeql/ql-training/query-examples/java/global-data-flow-java-1.ql @@ -1,14 +1,14 @@ import java import semmle.code.java.dataflow.TaintTracking -class TaintedOGNLConfig extends TaintTracking::Configuration { - TaintedOGNLConfig() { this = "TaintedOGNLConfig" } +module TaintedOgnlConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { /* TBD */ } - override predicate isSource(DataFlow::Node source) { /* TBD */ } - - override predicate isSink(DataFlow::Node sink) { /* TBD */ } + predicate isSink(DataFlow::Node sink) { /* TBD */ } } -from TaintedOGNLConfig cfg, DataFlow::Node source, DataFlow::Node sink -where cfg.hasFlow(source, sink) +module TaintedOgnlFlow = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink +where TaintedOgnlFlow::flow(source, sink) select source, "This untrusted input is evaluated as an OGNL expression $@.", sink, "here" diff --git a/docs/codeql/ql-training/slide-snippets/global-data-flow.rst b/docs/codeql/ql-training/slide-snippets/global-data-flow.rst index 6cb52af0f84..f84db924e27 100644 --- a/docs/codeql/ql-training/slide-snippets/global-data-flow.rst +++ b/docs/codeql/ql-training/slide-snippets/global-data-flow.rst @@ -34,18 +34,18 @@ Global taint tracking library The ``semmle.code..dataflow.TaintTracking`` library provides a framework for implementing solvers for global taint tracking problems: - #. Subclass ``TaintTracking::Configuration`` following this template: + #. Implement ``DataFlow::ConfigSig`` and use ``TaintTracking::Global`` following this template: .. code-block:: ql - class Config extends TaintTracking::Configuration { - Config() { this = "" } - override predicate isSource(DataFlow::Node nd) { ... } - override predicate isSink(DataFlow::Node nd) { ... } + module Config implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node nd) { ... } + predicate isSink(DataFlow::Node nd) { ... } } + module Flow = TaintTracking::Global; - #. Use ``Config.hasFlow(source, sink)`` to find inter-procedural paths. + #. Use ``Flow::flow(source, sink)`` to find inter-procedural paths. .. note:: - In addition to the taint tracking configuration described here, there is also an equivalent *data flow* configuration in ``semmle.code..dataflow.DataFlow``, ``DataFlow::Configuration``. Data flow configurations are used to track whether the exact value produced by a source is used by a sink, whereas taint tracking configurations are used to determine whether the source may influence the value used at the sink. Whether you use taint tracking or data flow depends on the analysis problem you are trying to solve. + In addition to the taint tracking flow configuration described here, there is also an equivalent *data flow* in ``semmle.code..dataflow.DataFlow``, ``DataFlow::Global``. Data flow is used to track whether the exact value produced by a source is used by a sink, whereas taint tracking is used to determine whether the source may influence the value used at the sink. Whether you use taint tracking or data flow depends on the analysis problem you are trying to solve. diff --git a/docs/codeql/ql-training/slide-snippets/path-queries.rst b/docs/codeql/ql-training/slide-snippets/path-queries.rst index e93f7fda966..6ed8b79814d 100644 --- a/docs/codeql/ql-training/slide-snippets/path-queries.rst +++ b/docs/codeql/ql-training/slide-snippets/path-queries.rst @@ -13,12 +13,16 @@ Use this template: */ import semmle.code..dataflow.TaintTracking - import DataFlow::PathGraph + ... - from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink - where cfg.hasFlowPath(source, sink) + + module Flow = TaintTracking::Global; + import Flow::PathGraph + + from Flow::PathNode source, Flow::PathNode sink + where Flow::flowPath(source, sink) select sink, source, sink, "" .. note:: - To see the paths between the source and the sinks, we can convert the query to a path problem query. There are a few minor changes that need to be made for this to work–we need an additional import, to specify ``PathNode`` rather than ``Node``, and to add the source/sink to the query output (so that we can automatically determine the paths). \ No newline at end of file + To see the paths between the source and the sinks, we can convert the query to a path problem query. There are a few minor changes that need to be made for this to work–we need an additional import, to specify ``PathNode`` rather than ``Node``, and to add the source/sink to the query output (so that we can automatically determine the paths). diff --git a/docs/codeql/reusables/supported-versions-compilers.rst b/docs/codeql/reusables/supported-versions-compilers.rst index 42d4eaad956..81de83cef95 100644 --- a/docs/codeql/reusables/supported-versions-compilers.rst +++ b/docs/codeql/reusables/supported-versions-compilers.rst @@ -4,7 +4,7 @@ :stub-columns: 1 Language,Variants,Compilers,Extensions - C/C++,"C89, C99, C11, C17, C++98, C++03, C++11, C++14, C++17, C++20 [1]_","Clang (and clang-cl [2]_) extensions (up to Clang 12.0), + C/C++,"C89, C99, C11, C17, C++98, C++03, C++11, C++14, C++17, C++20 [1]_","Clang (including clang-cl [2]_ and armclang) extensions (up to Clang 12.0), GNU extensions (up to GCC 11.1), diff --git a/docs/codeql/writing-codeql-queries/creating-path-queries.rst b/docs/codeql/writing-codeql-queries/creating-path-queries.rst index fc3b18a9b95..2c5b1ff57d7 100644 --- a/docs/codeql/writing-codeql-queries/creating-path-queries.rst +++ b/docs/codeql/writing-codeql-queries/creating-path-queries.rst @@ -58,18 +58,22 @@ You should use the following template: import // For some languages (Java/C++/Python/Swift) you need to explicitly import the data flow library, such as // import semmle.code.java.dataflow.DataFlow or import codeql.swift.dataflow.DataFlow - import DataFlow::PathGraph ... - from MyConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink - where config.hasFlowPath(source, sink) + module Flow = DataFlow::Global; + import Flow::PathGraph + + from Flow::PathNode source, Flow::PathNode sink + where Flow::flowPath(source, sink) select sink.getNode(), source, sink, "" Where: -- ``DataFlow::Pathgraph`` is the path graph module you need to import from the standard CodeQL libraries. -- ``source`` and ``sink`` are nodes on the `path graph `__, and ``DataFlow::PathNode`` is their type. -- ``MyConfiguration`` is a class containing the predicates which define how data may flow between the ``source`` and the ``sink``. +- ``MyConfiguration`` is a module containing the predicates that define how data may flow between the ``source`` and the ``sink``. +- ``Flow`` is the result of the data flow computation based on ``MyConfiguration``. +- ``Flow::Pathgraph`` is the resulting data flow graph module you need to import in order to include path explanations in the query. +- ``source`` and ``sink`` are nodes in the graph as defined in the configuration, and ``Flow::PathNode`` is their type. +- ``DataFlow::Global<..>`` is an invocation of data flow. ``TaintTracking::Global<..>`` can be used instead to include a default set of additional taint steps. The following sections describe the main requirements for a valid path query. @@ -83,14 +87,14 @@ The other metadata requirements depend on how you intend to run the query. For m Generating path explanations **************************** -In order to generate path explanations, your query needs to compute a `path graph `__. +In order to generate path explanations, your query needs to compute a graph. To do this you need to define a :ref:`query predicate ` called ``edges`` in your query. This predicate defines the edge relations of the graph you are computing, and it is used to compute the paths related to each result that your query generates. You can import a predefined ``edges`` predicate from a path graph module in one of the standard data flow libraries. In addition to the path graph module, the data flow libraries contain the other ``classes``, ``predicates``, and ``modules`` that are commonly used in data flow analysis. .. code-block:: ql - import DataFlow::PathGraph + import MyFlow::PathGraph This statement imports the ``PathGraph`` module from the data flow library (``DataFlow.qll``), in which ``edges`` is defined. @@ -106,7 +110,7 @@ You can also define your own ``edges`` predicate in the body of your query. It s .. code-block:: ql query predicate edges(PathNode a, PathNode b) { - /** Logical conditions which hold if `(a,b)` is an edge in the data flow graph */ + /* Logical conditions which hold if `(a,b)` is an edge in the data flow graph */ } For more examples of how to define an ``edges`` predicate, visit the `standard CodeQL libraries `__ and search for ``edges``. @@ -117,14 +121,23 @@ Declaring sources and sinks You must provide information about the ``source`` and ``sink`` in your path query. These are objects that correspond to the nodes of the paths that you are exploring. The name and the type of the ``source`` and the ``sink`` must be declared in the ``from`` statement of the query, and the types must be compatible with the nodes of the graph computed by the ``edges`` predicate. -If you are querying C/C++, C#, Go, Java, JavaScript, Python, or Ruby code (and you have used ``import DataFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the ``Configuration`` class in the data flow library. You should declare all three of these objects in the ``from`` statement. +If you are querying C/C++, C#, Go, Java, JavaScript, Python, or Ruby code (and you have used ``import MyFlow::PathGraph`` in your query), the definitions of the ``source`` and ``sink`` are accessed via the module resulting from the application of the ``Global<..>`` module in the data flow library. You should declare both of these objects in the ``from`` statement. For example: .. code-block:: ql - from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink + module MyFlow = DataFlow::Global; -The configuration class is accessed by importing the data flow library. This class contains the predicates which define how data flow is treated in the query: + from MyFlow::PathNode source, MyFlow::PathNode sink + +The configuration module must be defined to include definitions of sources and sinks. For example: + +.. code-block:: ql + + module MyConfiguration implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { ... } + predicate isSink(DataFlow::Node source) { ... } + } - ``isSource()`` defines where data may flow from. - ``isSink()`` defines where data may flow to. @@ -141,11 +154,11 @@ This clause can use :ref:`aggregations `, :ref:`predicates ; + + from MyFlow::PathNode source, MyFlow::PathNode sink + where MyFlow::flowPath(source, sink) select sink.getNode(), source, sink, "Sink is reached from $@.", source.getNode(), "here" The same query can be slightly simplified by rewriting it without :ref:`path explanations `: .. code-block:: ql - from MyConfig config, DataFlow::Node source, DataFlow::Node sink - where config.hasPath(source, sink) + from DataFlow::Node source, DataFlow::Node sink + where MyFlow::flow(source, sink) select sink, "Sink is reached from $@.", source.getNode(), "here" If a data-flow query that you have written doesn't produce the results you expect it to, there may be a problem with your query. @@ -48,7 +48,7 @@ Data-flow configurations contain a parameter called ``fieldFlowBranchLimit``. If .. code-block:: ql - override int fieldFlowBranchLimit() { result = 5000 } + int fieldFlowBranchLimit() { result = 5000 } If there are still no results and performance is still useable, then it is best to leave this set to a high value while doing further debugging. @@ -57,7 +57,7 @@ Partial flow A naive next step could be to change the sink definition to ``any()``. This would mean that we would get a lot of flow to all the places that are reachable from the sources. While this approach may work in some cases, you might find that it produces so many results that it's very hard to explore the findings. It can also dramatically affect query performance. More importantly, you might not even see all the partial flow paths. This is because the data-flow library tries very hard to prune impossible paths and, since field stores and reads must be evenly matched along a path, we will never see paths going through a store that fail to reach a corresponding read. This can make it hard to see where flow actually stops. -To avoid these problems, a data-flow ``Configuration`` comes with a mechanism for exploring partial flow that tries to deal with these caveats. This is the ``Configuration.hasPartialFlow`` predicate: +To avoid these problems, the data-flow library comes with a mechanism for exploring partial flow that tries to deal with these caveats. This is the ``MyFlow::FlowExploration::partialFlow`` predicate: .. code-block:: ql @@ -71,25 +71,23 @@ To avoid these problems, a data-flow ``Configuration`` comes with a mechanism fo * perform poorly if the number of sources is too big and/or the exploration * limit is set too high without using barriers. * - * This predicate is disabled (has no results) by default. Override - * `explorationLimit()` with a suitable number to enable this predicate. - * * To use this in a `path-problem` query, import the module `PartialPathGraph`. */ - final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + predicate partialFlow(PartialPathNode source, PartialPathNode node, int dist) { -There is also a ``Configuration.hasPartialFlowRev`` for exploring flow backwards from a sink. +There is also a ``partialFlowRev`` for exploring flow backwards from a sink. -As noted in the documentation for ``hasPartialFlow`` (for example, in the -`CodeQL for Java documentation `__) you must first enable this by adding an override of ``explorationLimit``. For example: +To get access to these predicates you must instantiate the ``MyFlow::FlowExploration<>`` module with an exploration limit. For example: .. code-block:: ql - override int explorationLimit() { result = 5 } + int explorationLimit() { result = 5 } -This defines the exploration radius within which ``hasPartialFlow`` returns results. + module MyPartialFlow = MyFlow::FlowExploration; -To get good performance when using ``hasPartialFlow`` it is important to ensure the ``isSink`` predicate of the configuration has no results. Likewise, when using ``hasPartialFlowRev`` the ``isSource`` predicate of the configuration should have no results. +This defines the exploration radius within which ``partialFlow`` returns results. + +To get good performance when using ``partialFlow`` it is important to ensure the ``isSink`` predicate of the configuration has no results. Likewise, when using ``partialFlowRev`` the ``isSource`` predicate of the configuration should have no results. It is also useful to focus on a single source at a time as the starting point for the flow exploration. This is most easily done by adding a temporary restriction in the ``isSource`` predicate. @@ -97,9 +95,9 @@ To do quick evaluations of partial flow it is often easiest to add a predicate t .. code-block:: ql - predicate adhocPartialFlow(Callable c, PartialPathNode n, Node src, int dist) { - exists(MyConfig conf, PartialPathNode source | - conf.hasPartialFlow(source, n, dist) and + predicate adhocPartialFlow(Callable c, MyPartialFlow::PartialPathNode n, Node src, int dist) { + exists(MyPartialFlow::PartialPathNode source | + MyPartialFlow::partialFlow(source, n, dist) and src = source.getNode() and c = n.getNode().getEnclosingCallable() ) @@ -111,7 +109,7 @@ If you are focusing on a single source then the ``src`` column is superfluous. Y If you see a large number of partial flow results, you can focus them in a couple of ways: - If flow travels a long distance following an expected path, that can result in a lot of uninteresting flow being included in the exploration radius. To reduce the amount of uninteresting flow, you can replace the source definition with a suitable ``node`` that appears along the path and restart the partial flow exploration from that point. -- Creative use of barriers and sanitizers can be used to cut off flow paths that are uninteresting. This also reduces the number of partial flow results to explore while debugging. +- Creative use of barriers can be used to cut off flow paths that are uninteresting. This also reduces the number of partial flow results to explore while debugging. Further reading ---------------- diff --git a/docs/ql-design-patterns.md b/docs/ql-design-patterns.md index b6d7eab88fc..8d8d126cba2 100644 --- a/docs/ql-design-patterns.md +++ b/docs/ql-design-patterns.md @@ -72,14 +72,12 @@ Importing new files can modify the behaviour of the standard library, by introdu Therefore, unless you have good reason not to, you should ensure that all subclasses are included when the base-class is (to the extent possible). -One example where this _does not_ apply: `DataFlow::Configuration` and its variants are meant to be subclassed, but we generally do not want to import all configurations into the same scope at once. - ## Abstract classes as open or closed unions A class declared as `abstract` in QL represents a union of its direct subtypes (restricted by the intersections of its supertypes and subject to its characteristic predicate). Depending on context, we may want this union to be considered "open" or "closed". -An open union is generally used for extensibility. For example, the abstract classes suggested by the `::Range` design pattern are explicitly intended as extension hooks. As another example, the `DataFlow::Configuration` design pattern provides an abstract class that is intended to be subclassed as a configuration mechanism. +An open union is generally used for extensibility. For example, the abstract classes suggested by the `::Range` design pattern are explicitly intended as extension hooks. A closed union is a class for which we do not expect users of the library to add more values. Historically, we have occasionally modelled this as `abstract` classes in QL, but these days that would be considered an anti-pattern: Abstract classes that are intended to be closed behave in surprising ways when subclassed by library users, and importing libraries that include derived classes can invalidate compilation caches and subvert the meaning of the program. diff --git a/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md b/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md index 1eec83af074..c8cdc34f827 100644 --- a/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--add-support-for-bun-framework.md @@ -1,3 +1,4 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for the [Bun framework](https://bun.uptrace.dev/) has been added. - diff --git a/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md b/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md index cf170b1e9fb..b1e030c13e9 100644 --- a/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--add-support-for-gqlgen-framework.md @@ -1,2 +1,4 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for [gqlgen](https://github.com/99designs/gqlgen) has been added. diff --git a/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md b/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md index ab6aa93c14b..4f321faa82b 100644 --- a/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md +++ b/go/ql/lib/change-notes/2023-06-28--improve-support-for-gopg-framework.md @@ -1,3 +1,5 @@ -lgtm,codescanning +--- +category: minorAnalysis +--- * Support for the [go-pg framework](https://github.com/go-pg/pg) has been improved. diff --git a/go/ql/lib/change-notes/2023-07-19-method-call-node.md b/go/ql/lib/change-notes/2023-07-19-method-call-node.md deleted file mode 100644 index 765e8d5589e..00000000000 --- a/go/ql/lib/change-notes/2023-07-19-method-call-node.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: fix ---- -* The definition of `DataFlow::MethodCallNode` has been expanded to include `DataFlow::CallNode`s where what is being called is a variable that has a method assigned to it within the calling function. This means we can now follow data flow into the receiver of such a method call. diff --git a/go/ql/lib/semmle/go/controlflow/IR.qll b/go/ql/lib/semmle/go/controlflow/IR.qll index ce120674b4c..4df0867e4c6 100644 --- a/go/ql/lib/semmle/go/controlflow/IR.qll +++ b/go/ql/lib/semmle/go/controlflow/IR.qll @@ -744,7 +744,7 @@ module IR { override Type getResultType() { exists(CallExpr c | this.getBase() = evalExprInstruction(c) | - result = c.getCalleeType().getResultType(i) + result = c.getTarget().getResultType(i) ) or exists(Expr e | this.getBase() = evalExprInstruction(e) | diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll index 9bcfde9f9b0..ea768f54715 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll @@ -8,7 +8,7 @@ private import DataFlowPrivate private predicate isInterfaceCallReceiver( DataFlow::CallNode call, DataFlow::Node recv, InterfaceType tp, string m ) { - call.(DataFlow::MethodCallNode).getReceiver() = recv and + call.getReceiver() = recv and recv.getType().getUnderlyingType() = tp and m = call.getACalleeIncludingExternals().asFunction().getName() } @@ -149,9 +149,7 @@ predicate golangSpecificParamArgFilter( // Interface methods calls may be passed strictly to that exact method's model receiver: arg.getPosition() != -1 or - exists(Function callTarget | - callTarget = call.getNode().(DataFlow::CallNode).getACalleeIncludingExternals().asFunction() - | + exists(Function callTarget | callTarget = call.getNode().(DataFlow::CallNode).getTarget() | not isInterfaceMethod(callTarget) or callTarget = p.getCallable().asSummarizedCallable().asFunction() diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll index dc2811045ed..59224024ec3 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll @@ -439,8 +439,8 @@ module Public { CallNode getCallNode() { result = call } override Type getType() { - exists(SignatureType t | t = call.getCall().getCalleeType() | - result = t.getParameterType(t.getNumParameter() - 1) + exists(Function f | f = call.getTarget() | + result = f.getParameterType(f.getNumParameter() - 1) ) } @@ -474,11 +474,7 @@ module Public { class CallNode extends ExprNode { override CallExpr expr; - /** - * Gets the declared target of this call, if it exists. - * - * This doesn't exist when a function is called via a variable. - */ + /** Gets the declared target of this call */ Function getTarget() { result = expr.getTarget() } private DataFlow::Node getACalleeSource() { result = getACalleeSource(this) } @@ -626,40 +622,16 @@ module Public { /** Gets a result of this call. */ Node getAResult() { result = this.getResult(_) } - /** - * Gets the data flow node corresponding to the receiver of this call, if any. - * - * When a method value is assigned to a variable then when it is called it - * looks like a function call, as in the following example. - * ```go - * file, _ := os.Open("test.txt") - * f := file.Close - * f() - * ``` - * In this case we use local flow to try to find the receiver (`file` in - * the above example). - */ + /** Gets the data flow node corresponding to the receiver of this call, if any. */ Node getReceiver() { result = this.getACalleeSource().(MethodReadNode).getReceiver() } /** Holds if this call has an ellipsis after its last argument. */ predicate hasEllipsis() { expr.hasEllipsis() } } - /** - * A data flow node that represents a call to a method. - * - * When a method value is assigned to a variable then when it is called it - * syntactically looks like a function call, as in the following example. - * ```go - * file, _ := os.Open("test.txt") - * f := file.Close - * f() - * ``` - * In this case we use local flow to see whether a method is assigned to the - * function. - */ + /** A data flow node that represents a call to a method. */ class MethodCallNode extends CallNode { - MethodCallNode() { getACalleeSource(this) instanceof MethodReadNode } + MethodCallNode() { expr.getTarget() instanceof Method } override Method getTarget() { result = expr.getTarget() } diff --git a/go/ql/lib/semmle/go/frameworks/Gqlgen.qll b/go/ql/lib/semmle/go/frameworks/Gqlgen.qll index 0a86dcde0b9..a4c3993d5d4 100644 --- a/go/ql/lib/semmle/go/frameworks/Gqlgen.qll +++ b/go/ql/lib/semmle/go/frameworks/Gqlgen.qll @@ -7,7 +7,7 @@ module Gqlgen { /** An autogenerated file containing gqlgen code. */ private class GqlgenGeneratedFile extends File { GqlgenGeneratedFile() { - exists(DataFlow::MethodCallNode call | + exists(DataFlow::CallNode call | call.getReceiver().getType().hasQualifiedName("github.com/99designs/gqlgen/graphql", _) and call.getFile() = this ) diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index a32f9a5486f..b3f1d075c86 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -131,7 +131,7 @@ module NetHttp { ) or stack = SummaryComponentStack::argument(-1) and - result = call.(DataFlow::MethodCallNode).getReceiver() + result = call.getReceiver() } private class ResponseBody extends Http::ResponseBody::Range { diff --git a/go/ql/lib/semmle/go/security/ExternalAPIs.qll b/go/ql/lib/semmle/go/security/ExternalAPIs.qll index 689ddba1b18..8a27ce28c2a 100644 --- a/go/ql/lib/semmle/go/security/ExternalAPIs.qll +++ b/go/ql/lib/semmle/go/security/ExternalAPIs.qll @@ -86,7 +86,7 @@ class ExternalApiDataNode extends DataFlow::Node { this = call.getArgument(i) or // Receiver to a call to a method which returns non trivial value - this = call.(DataFlow::MethodCallNode).getReceiver() and + this = call.getReceiver() and i = -1 ) and // Not defined in the code that is being analyzed diff --git a/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll b/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll index 8685b4b5562..5f0572db11e 100644 --- a/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll +++ b/go/ql/lib/semmle/go/security/SafeUrlFlowCustomizations.qll @@ -33,9 +33,7 @@ module SafeUrlFlow { /** A function model step using `UnsafeUrlMethod`, considered as a sanitizer for safe URL flow. */ private class UnsafeUrlMethodEdge extends SanitizerEdge { - UnsafeUrlMethodEdge() { - this = any(UnsafeUrlMethod um).getACall().(DataFlow::MethodCallNode).getReceiver() - } + UnsafeUrlMethodEdge() { this = any(UnsafeUrlMethod um).getACall().getReceiver() } } /** Any slicing of the URL, considered as a sanitizer for safe URL flow. */ diff --git a/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql b/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql index 413753ac26d..c78a9d4a36f 100644 --- a/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql +++ b/go/ql/src/InconsistentCode/UnhandledCloseWritableHandle.ql @@ -90,7 +90,7 @@ predicate isWritableFileHandle(DataFlow::Node source, DataFlow::CallNode call) { /** * Holds if `os.File.Close` is called on `sink`. */ -predicate isCloseSink(DataFlow::Node sink, DataFlow::MethodCallNode closeCall) { +predicate isCloseSink(DataFlow::Node sink, DataFlow::CallNode closeCall) { // find calls to the os.File.Close function closeCall = any(CloseFileFun f).getACall() and // that are unhandled @@ -115,7 +115,7 @@ predicate isCloseSink(DataFlow::Node sink, DataFlow::MethodCallNode closeCall) { * Holds if `os.File.Sync` is called on `sink` and the result of the call is neither * deferred nor discarded. */ -predicate isHandledSync(DataFlow::Node sink, DataFlow::MethodCallNode syncCall) { +predicate isHandledSync(DataFlow::Node sink, DataFlow::CallNode syncCall) { // find a call of the `os.File.Sync` function syncCall = any(SyncFileFun f).getACall() and // match the sink with the object on which the method is called diff --git a/go/ql/src/Security/CWE-352/ConstantOauth2State.ql b/go/ql/src/Security/CWE-352/ConstantOauth2State.ql index 57f899cc80a..abe982f7fe5 100644 --- a/go/ql/src/Security/CWE-352/ConstantOauth2State.ql +++ b/go/ql/src/Security/CWE-352/ConstantOauth2State.ql @@ -113,7 +113,7 @@ class PrivateUrlFlowsToAuthCodeUrlCall extends DataFlow::Configuration { ) } - predicate isSinkCall(DataFlow::Node sink, DataFlow::MethodCallNode call) { + predicate isSinkCall(DataFlow::Node sink, DataFlow::CallNode call) { exists(AuthCodeUrl m | call = m.getACall() | sink = call.getReceiver()) } diff --git a/go/ql/src/experimental/CWE-1004/AuthCookie.qll b/go/ql/src/experimental/CWE-1004/AuthCookie.qll index bbc0681d192..676249c7b8c 100644 --- a/go/ql/src/experimental/CWE-1004/AuthCookie.qll +++ b/go/ql/src/experimental/CWE-1004/AuthCookie.qll @@ -189,11 +189,11 @@ class GorillaCookieStoreSaveTrackingConfiguration extends DataFlow::Configuratio } override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::MethodCallNode mcn | - mcn.getTarget() + exists(DataFlow::MethodCallNode cn | + cn.getTarget() .hasQualifiedName(package("github.com/gorilla/sessions", ""), "CookieStore", "Get") and - pred = mcn.getReceiver() and - succ = mcn.getResult(0) + pred = cn.getReceiver() and + succ = cn.getResult(0) ) } } diff --git a/go/ql/src/experimental/CWE-285/PamAuthBypass.ql b/go/ql/src/experimental/CWE-285/PamAuthBypass.ql index 0de25e94a73..3e6864c021f 100644 --- a/go/ql/src/experimental/CWE-285/PamAuthBypass.ql +++ b/go/ql/src/experimental/CWE-285/PamAuthBypass.ql @@ -41,7 +41,7 @@ class PamStartToAcctMgmtConfig extends TaintTracking::Configuration { } override predicate isSink(DataFlow::Node sink) { - exists(PamAcctMgmt p | p.getACall().(DataFlow::MethodCallNode).getReceiver() = sink) + exists(PamAcctMgmt p | p.getACall().getReceiver() = sink) } } @@ -53,7 +53,7 @@ class PamStartToAuthenticateConfig extends TaintTracking::Configuration { } override predicate isSink(DataFlow::Node sink) { - exists(PamAuthenticate p | p.getACall().(DataFlow::MethodCallNode).getReceiver() = sink) + exists(PamAuthenticate p | p.getACall().getReceiver() = sink) } } diff --git a/go/ql/src/experimental/CWE-134/DsnBad.go b/go/ql/src/experimental/CWE-74/DsnBad.go similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnBad.go rename to go/ql/src/experimental/CWE-74/DsnBad.go diff --git a/go/ql/src/experimental/CWE-134/DsnGood.go b/go/ql/src/experimental/CWE-74/DsnGood.go similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnGood.go rename to go/ql/src/experimental/CWE-74/DsnGood.go diff --git a/go/ql/src/experimental/CWE-134/DsnInjection.qhelp b/go/ql/src/experimental/CWE-74/DsnInjection.qhelp similarity index 100% rename from go/ql/src/experimental/CWE-134/DsnInjection.qhelp rename to go/ql/src/experimental/CWE-74/DsnInjection.qhelp diff --git a/go/ql/src/experimental/CWE-134/DsnInjection.ql b/go/ql/src/experimental/CWE-74/DsnInjection.ql similarity index 81% rename from go/ql/src/experimental/CWE-134/DsnInjection.ql rename to go/ql/src/experimental/CWE-74/DsnInjection.ql index 89bb83f9284..3197d04534b 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjection.ql +++ b/go/ql/src/experimental/CWE-74/DsnInjection.ql @@ -6,7 +6,7 @@ * @id go/dsn-injection * @tags security * experimental - * external/cwe/cwe-134 + * external/cwe/cwe-74 */ import go @@ -18,5 +18,5 @@ private class UntrustedFlowAsSource extends Source instanceof UntrustedFlowSourc from DsnInjection cfg, DataFlow::PathNode source, DataFlow::PathNode sink where cfg.hasFlowPath(source, sink) -select sink.getNode(), source, sink, "This query depends on a $@.", source.getNode(), - "user-provided value" +select sink.getNode(), source, sink, "Data-Source Name is built using $@.", source.getNode(), + "untrusted user input" diff --git a/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll b/go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll similarity index 90% rename from go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll rename to go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll index de547b8a07d..e66cb7cba73 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjectionCustomizations.qll +++ b/go/ql/src/experimental/CWE-74/DsnInjectionCustomizations.qll @@ -14,8 +14,11 @@ class DsnInjection extends TaintTracking::Configuration { override predicate isSource(DataFlow::Node node) { node instanceof Source } override predicate isSink(DataFlow::Node node) { - exists(Function f | f.hasQualifiedName("database/sql", "Open") | - node = f.getACall().getArgument(1) + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("database/sql", "Open") and + c.getArgument(0).getStringValue() = "mysql" + | + node = c.getArgument(1) ) } diff --git a/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql b/go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql similarity index 96% rename from go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql rename to go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql index 7ecd3b1cc8a..ff4142f12d4 100644 --- a/go/ql/src/experimental/CWE-134/DsnInjectionLocal.ql +++ b/go/ql/src/experimental/CWE-74/DsnInjectionLocal.ql @@ -6,7 +6,7 @@ * @id go/dsn-injection-local * @tags security * experimental - * external/cwe/cwe-134 + * external/cwe/cwe-74 */ import go diff --git a/go/ql/src/experimental/frameworks/CleverGo.qll b/go/ql/src/experimental/frameworks/CleverGo.qll index 20991dbdd03..2433ba4997a 100644 --- a/go/ql/src/experimental/frameworks/CleverGo.qll +++ b/go/ql/src/experimental/frameworks/CleverGo.qll @@ -174,7 +174,7 @@ private module CleverGo { /** * Models HTTP redirects. */ - private class HttpRedirect extends Http::Redirect::Range, DataFlow::MethodCallNode { + private class HttpRedirect extends Http::Redirect::Range, DataFlow::CallNode { DataFlow::Node urlNode; HttpRedirect() { @@ -211,7 +211,7 @@ private module CleverGo { string package, string receiverName, DataFlow::Node bodyNode, string contentTypeString, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -317,7 +317,7 @@ private module CleverGo { string package, string receiverName, DataFlow::Node bodyNode, DataFlow::Node contentTypeNode, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -356,7 +356,7 @@ private module CleverGo { private predicate setsBody( string package, string receiverName, DataFlow::Node receiverNode, DataFlow::Node bodyNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -400,7 +400,7 @@ private module CleverGo { // Holds for a call that sets a header with a key-value combination. private predicate setsHeaderDynamicKeyValue( - string package, string receiverName, DataFlow::MethodCallNode headerSetterCall, + string package, string receiverName, DataFlow::CallNode headerSetterCall, DataFlow::Node headerNameNode, DataFlow::Node headerValueNode, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -446,7 +446,7 @@ private module CleverGo { // Holds for a call that sets the content-type header (implicit). private predicate setsStaticHeaderContentType( - string package, string receiverName, DataFlow::MethodCallNode setterCall, string valueString, + string package, string receiverName, DataFlow::CallNode setterCall, string valueString, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -501,8 +501,8 @@ private module CleverGo { // Holds for a call that sets the content-type header via a parameter. private predicate setsDynamicHeaderContentType( - string package, string receiverName, DataFlow::MethodCallNode setterCall, - DataFlow::Node valueNode, DataFlow::Node receiverNode + string package, string receiverName, DataFlow::CallNode setterCall, DataFlow::Node valueNode, + DataFlow::Node receiverNode ) { exists(string methodName, Method met | met.hasQualifiedName(package, receiverName, methodName) and diff --git a/go/ql/src/experimental/frameworks/Fiber.qll b/go/ql/src/experimental/frameworks/Fiber.qll index 3f94d678705..27bb9bbcd10 100644 --- a/go/ql/src/experimental/frameworks/Fiber.qll +++ b/go/ql/src/experimental/frameworks/Fiber.qll @@ -129,7 +129,7 @@ private module Fiber { /** * Models HTTP redirects. */ - private class Redirect extends Http::Redirect::Range, DataFlow::MethodCallNode { + private class Redirect extends Http::Redirect::Range, DataFlow::CallNode { DataFlow::Node urlNode; Redirect() { @@ -167,7 +167,7 @@ private module Fiber { // Holds for a call that sets a header with a key-value combination. private predicate setsHeaderDynamicKeyValue( - string package, string receiverName, DataFlow::MethodCallNode headerSetterCall, + string package, string receiverName, DataFlow::CallNode headerSetterCall, DataFlow::Node headerNameNode, DataFlow::Node headerValueNode, DataFlow::Node receiverNode ) { exists(string methodName, Method met | @@ -215,7 +215,7 @@ private module Fiber { string package, string receiverName, DataFlow::Node bodyNode, string contentTypeString, DataFlow::Node receiverNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() @@ -254,7 +254,7 @@ private module Fiber { private predicate setsBody( string package, string receiverName, DataFlow::Node receiverNode, DataFlow::Node bodyNode ) { - exists(string methodName, Method met, DataFlow::MethodCallNode bodySetterCall | + exists(string methodName, Method met, DataFlow::CallNode bodySetterCall | met.hasQualifiedName(package, receiverName, methodName) and bodySetterCall = met.getACall() and receiverNode = bodySetterCall.getReceiver() diff --git a/go/ql/test/experimental/CWE-134/DsnInjection.qlref b/go/ql/test/experimental/CWE-134/DsnInjection.qlref deleted file mode 100644 index c2308280884..00000000000 --- a/go/ql/test/experimental/CWE-134/DsnInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/CWE-134/DsnInjection.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref b/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref deleted file mode 100644 index b7b7e2bdbdd..00000000000 --- a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/CWE-134/DsnInjectionLocal.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/Dsn.go b/go/ql/test/experimental/CWE-74/Dsn.go similarity index 100% rename from go/ql/test/experimental/CWE-134/Dsn.go rename to go/ql/test/experimental/CWE-74/Dsn.go diff --git a/go/ql/test/experimental/CWE-134/DsnInjection.expected b/go/ql/test/experimental/CWE-74/DsnInjection.expected similarity index 80% rename from go/ql/test/experimental/CWE-134/DsnInjection.expected rename to go/ql/test/experimental/CWE-74/DsnInjection.expected index 531bdb0ead2..2f92b181757 100644 --- a/go/ql/test/experimental/CWE-134/DsnInjection.expected +++ b/go/ql/test/experimental/CWE-74/DsnInjection.expected @@ -9,4 +9,4 @@ nodes | Dsn.go:50:29:50:33 | dbDSN | semmle.label | dbDSN | subpaths #select -| Dsn.go:50:29:50:33 | dbDSN | Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN | This query depends on a $@. | Dsn.go:47:10:47:30 | call to FormValue | user-provided value | +| Dsn.go:50:29:50:33 | dbDSN | Dsn.go:47:10:47:30 | call to FormValue | Dsn.go:50:29:50:33 | dbDSN | Data-Source Name is built using $@. | Dsn.go:47:10:47:30 | call to FormValue | untrusted user input | diff --git a/go/ql/test/experimental/CWE-74/DsnInjection.qlref b/go/ql/test/experimental/CWE-74/DsnInjection.qlref new file mode 100644 index 00000000000..7bd852e221f --- /dev/null +++ b/go/ql/test/experimental/CWE-74/DsnInjection.qlref @@ -0,0 +1 @@ +experimental/CWE-74/DsnInjection.ql \ No newline at end of file diff --git a/go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.expected similarity index 100% rename from go/ql/test/experimental/CWE-134/DsnInjectionLocal.expected rename to go/ql/test/experimental/CWE-74/DsnInjectionLocal.expected diff --git a/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref new file mode 100644 index 00000000000..97fa2ad022a --- /dev/null +++ b/go/ql/test/experimental/CWE-74/DsnInjectionLocal.qlref @@ -0,0 +1 @@ +experimental/CWE-74/DsnInjectionLocal.ql \ No newline at end of file diff --git a/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go b/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go index 54aa78b7a90..4e2dc169c6d 100644 --- a/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go +++ b/go/ql/test/library-tests/semmle/go/dataflow/GenericFunctionsAndTypes/generictypesandmethods.go @@ -51,7 +51,7 @@ func main() { gs1 := GenericStruct1[string]{""} gs1.Field = source() f := gs1.Getter - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs1 := GenericStruct1[string]{""} @@ -62,7 +62,7 @@ func main() { gs1 := GenericStruct1[string]{""} f := gs1.Setter f(source()) - sink(gs1.Field) // $ hasValueFlow="selection of Field" + sink(gs1.Field) // $ MISSING: hasValueFlow="selection of Field" } { @@ -87,7 +87,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} gs2.Field1 = source() f := gs2.Getter1 - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs2 := GenericStruct2[string, string]{"", ""} @@ -98,7 +98,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} f := gs2.Setter1 f(source()) - sink(gs2.Field1) // $ hasValueFlow="selection of Field1" + sink(gs2.Field1) // $ MISSING: hasValueFlow="selection of Field1" } { @@ -123,7 +123,7 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} gs2.Field2 = source() f := gs2.Getter2 - sink(f()) // $ hasValueFlow="call to f" + sink(f()) // $ MISSING: hasValueFlow="call to f" } { gs2 := GenericStruct2[string, string]{"", ""} @@ -134,6 +134,6 @@ func main() { gs2 := GenericStruct2[string, string]{"", ""} f := gs2.Setter2 f(source()) - sink(gs2.Field2) // $ hasValueFlow="selection of Field2" + sink(gs2.Field2) // $ MISSING: hasValueFlow="selection of Field2" } } diff --git a/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql b/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql index 169b334ef29..c4c0cafb50e 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql +++ b/go/ql/test/library-tests/semmle/go/frameworks/Yaml/tests.ql @@ -13,10 +13,9 @@ DataFlow::CallNode getAYamlCall() { predicate isSourceSinkPair(DataFlow::Node inNode, DataFlow::Node outNode) { exists(DataFlow::CallNode cn | cn = getAYamlCall() | - inNode = [cn.getAnArgument(), cn.(DataFlow::MethodCallNode).getReceiver()] and + inNode = [cn.getAnArgument(), cn.getReceiver()] and ( - outNode.(DataFlow::PostUpdateNode).getPreUpdateNode() = - [cn.getAnArgument(), cn.(DataFlow::MethodCallNode).getReceiver()] + outNode.(DataFlow::PostUpdateNode).getPreUpdateNode() = [cn.getAnArgument(), cn.getReceiver()] or outNode = cn.getAResult() ) diff --git a/java/documentation/library-coverage/coverage.csv b/java/documentation/library-coverage/coverage.csv index 1681b511d92..b0d449b810a 100644 --- a/java/documentation/library-coverage/coverage.csv +++ b/java/documentation/library-coverage/coverage.csv @@ -55,7 +55,7 @@ jakarta.ws.rs.container,,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,, jakarta.ws.rs.core,2,,149,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,,,,,,94,55 java.awt,,,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3 java.beans,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1, -java.io,50,,45,,,22,,,,,,,,,,,,,,28,,,,,,,,,,,,,,,,,,,43,2 +java.io,50,,46,,,22,,,,,,,,,,,,,,28,,,,,,,,,,,,,,,,,,,44,2 java.lang,31,,93,,13,,,,,,,,,,,,8,,,5,,,4,,,1,,,,,,,,,,,,,57,36 java.net,13,3,23,,,,,,,,,,,,,,,,,,,,,,,,,,13,,,,,,,,,3,23, java.nio,53,,36,,,5,,,,,,,,,,,,,,47,,,,,,,,,1,,,,,,,,,,36, diff --git a/java/documentation/library-coverage/coverage.rst b/java/documentation/library-coverage/coverage.rst index d07f04c3897..80ace3a0b33 100644 --- a/java/documentation/library-coverage/coverage.rst +++ b/java/documentation/library-coverage/coverage.rst @@ -18,10 +18,10 @@ Java framework & library support `Google Guava `_,``com.google.common.*``,,730,41,7,,,,, JBoss Logging,``org.jboss.logging``,,,324,,,,,, `JSON-java `_,``org.json``,,236,,,,,,, - Java Standard Library,``java.*``,3,688,205,80,,9,,,18 + Java Standard Library,``java.*``,3,689,205,80,,9,,,18 Java extensions,"``javax.*``, ``jakarta.*``",63,672,34,2,4,,1,1,2 Kotlin Standard Library,``kotlin*``,,1849,16,14,,,,,2 `Spring `_,``org.springframework.*``,29,483,115,4,,28,14,,35 Others,"``antlr``, ``cn.hutool.core.codec``, ``com.alibaba.druid.sql``, ``com.esotericsoftware.kryo.io``, ``com.esotericsoftware.kryo5.io``, ``com.fasterxml.jackson.core``, ``com.fasterxml.jackson.databind``, ``com.google.gson``, ``com.hubspot.jinjava``, ``com.jcraft.jsch``, ``com.mitchellbosecke.pebble``, ``com.opensymphony.xwork2.ognl``, ``com.rabbitmq.client``, ``com.thoughtworks.xstream``, ``com.unboundid.ldap.sdk``, ``com.zaxxer.hikari``, ``flexjson``, ``freemarker.cache``, ``freemarker.template``, ``groovy.lang``, ``groovy.text``, ``groovy.util``, ``hudson``, ``io.jsonwebtoken``, ``io.netty.bootstrap``, ``io.netty.buffer``, ``io.netty.channel``, ``io.netty.handler.codec``, ``io.netty.handler.ssl``, ``io.netty.handler.stream``, ``io.netty.resolver``, ``io.netty.util``, ``javafx.scene.web``, ``jenkins``, ``jodd.json``, ``net.sf.json``, ``net.sf.saxon.s9api``, ``ognl``, ``okhttp3``, ``org.acegisecurity``, ``org.antlr.runtime``, ``org.apache.commons.codec``, ``org.apache.commons.compress.archivers.tar``, ``org.apache.commons.exec``, ``org.apache.commons.httpclient.util``, ``org.apache.commons.jelly``, ``org.apache.commons.jexl2``, ``org.apache.commons.jexl3``, ``org.apache.commons.lang``, ``org.apache.commons.logging``, ``org.apache.commons.net``, ``org.apache.commons.ognl``, ``org.apache.directory.ldap.client.api``, ``org.apache.hadoop.fs``, ``org.apache.hadoop.hive.metastore``, ``org.apache.hc.client5.http.async.methods``, ``org.apache.hc.client5.http.classic.methods``, ``org.apache.hc.client5.http.fluent``, ``org.apache.hive.hcatalog.templeton``, ``org.apache.ibatis.jdbc``, ``org.apache.log4j``, ``org.apache.shiro.codec``, ``org.apache.shiro.jndi``, ``org.apache.tools.ant``, ``org.apache.tools.zip``, ``org.apache.velocity.app``, ``org.apache.velocity.runtime``, ``org.codehaus.cargo.container.installer``, ``org.codehaus.groovy.control``, ``org.dom4j``, ``org.eclipse.jetty.client``, ``org.fusesource.leveldbjni``, ``org.geogebra.web.full.main``, ``org.gradle.api.file``, ``org.hibernate``, ``org.influxdb``, ``org.jdbi.v3.core``, ``org.jenkins.ui.icon``, ``org.jenkins.ui.symbol``, ``org.jooq``, ``org.kohsuke.stapler``, ``org.mvel2``, ``org.openjdk.jmh.runner.options``, ``org.scijava.log``, ``org.slf4j``, ``org.thymeleaf``, ``org.xml.sax``, ``org.xmlpull.v1``, ``org.yaml.snakeyaml``, ``play.libs.ws``, ``play.mvc``, ``ratpack.core.form``, ``ratpack.core.handling``, ``ratpack.core.http``, ``ratpack.exec``, ``ratpack.form``, ``ratpack.func``, ``ratpack.handling``, ``ratpack.http``, ``ratpack.util``, ``retrofit2``",126,5237,577,89,6,18,18,,200 - Totals,,283,13606,2067,290,16,122,33,1,391 + Totals,,283,13607,2067,290,16,122,33,1,391 diff --git a/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md b/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md new file mode 100644 index 00000000000..d093c771d51 --- /dev/null +++ b/java/ql/lib/change-notes/2023-07-19-inputstream-dispatch.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- +* Improved the precision of virtual dispatch of `java.io.InputStream` methods. Now, calls to these methods will not dispatch to arbitrary implementations of `InputStream` if there is a high-confidence alternative (like a models-as-data summary). + \ No newline at end of file diff --git a/java/ql/lib/ext/java.io.model.yml b/java/ql/lib/ext/java.io.model.yml index 98c51a7bad5..e4d543aa06d 100644 --- a/java/ql/lib/ext/java.io.model.yml +++ b/java/ql/lib/ext/java.io.model.yml @@ -84,6 +84,7 @@ extensions: - ["java.io", "File", True, "toString", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "File", True, "toURI", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "FilterOutputStream", True, "FilterOutputStream", "(OutputStream)", "", "Argument[0]", "Argument[this]", "taint", "manual"] + - ["java.io", "InputStream", True, "read", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"] - ["java.io", "InputStream", True, "read", "(byte[])", "", "Argument[this]", "Argument[0]", "taint", "manual"] - ["java.io", "InputStream", True, "read", "(byte[],int,int)", "", "Argument[this]", "Argument[0]", "taint", "manual"] - ["java.io", "InputStream", True, "readAllBytes", "", "", "Argument[this]", "ReturnValue", "taint", "manual"] diff --git a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll index 4b880542229..c22f77725a1 100644 --- a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll +++ b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll @@ -102,6 +102,8 @@ private module Dispatch { or t instanceof Interface and not t.fromSource() or + t.hasQualifiedName("java.io", "InputStream") + or t.hasQualifiedName("java.io", "Serializable") or t.hasQualifiedName("java.lang", "Iterable") diff --git a/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md b/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md new file mode 100644 index 00000000000..46ff145715d --- /dev/null +++ b/swift/ql/lib/change-notes/2023-07-19-regex-mode-flags -more.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The regular expression library now understands mode flags specified by `Regex` methods and the `NSRegularExpression` initializer. diff --git a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll index e217de4478d..109d1acec9c 100644 --- a/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll +++ b/swift/ql/lib/codeql/swift/frameworks/StandardLibrary/CustomUrlSchemes.qll @@ -56,7 +56,9 @@ private class ApplicationWithLaunchOptionsFunc extends Function { private class LaunchOptionsUrlVarDecl extends VarDecl { LaunchOptionsUrlVarDecl() { - this.getEnclosingDecl().asNominalTypeDecl().getFullName() = "UIApplication.LaunchOptionsKey" and + // ideally this would be the more accurate, but currently less robust: + // this.getEnclosingDecl().asNominalTypeDecl().getFullName() = "UIApplication.LaunchOptionsKey" and + this.getType().(NominalType).getFullName() = "UIApplication.LaunchOptionsKey" and this.getName() = "url" } } diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index e958801a9a4..0ae533d6843 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -36,22 +36,23 @@ abstract class RegexCreation extends DataFlow::Node { * created from. */ abstract DataFlow::Node getStringInput(); + + /** + * Gets a dataflow node for the options input that might contain parse mode + * flags (if any). + */ + DataFlow::Node getOptionsInput() { none() } } /** - * A data-flow node where a `Regex` or `NSRegularExpression` object is created. + * A data-flow node where a `Regex` object is created. */ -private class StandardRegexCreation extends RegexCreation { +private class RegexRegexCreation extends RegexCreation { DataFlow::Node input; - StandardRegexCreation() { + RegexRegexCreation() { exists(CallExpr call | - ( - call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or - call.getStaticTarget() - .(Method) - .hasQualifiedName("NSRegularExpression", "init(pattern:options:)") - ) and + call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) and input.asExpr() = call.getArgument(0).getExpr() and this.asExpr() = call ) @@ -60,6 +61,168 @@ private class StandardRegexCreation extends RegexCreation { override DataFlow::Node getStringInput() { result = input } } +/** + * A data-flow node where an `NSRegularExpression` object is created. + */ +private class NSRegularExpressionRegexCreation extends RegexCreation { + DataFlow::Node input; + + NSRegularExpressionRegexCreation() { + exists(CallExpr call | + call.getStaticTarget() + .(Method) + .hasQualifiedName("NSRegularExpression", "init(pattern:options:)") and + input.asExpr() = call.getArgument(0).getExpr() and + this.asExpr() = call + ) + } + + override DataFlow::Node getStringInput() { result = input } + + override DataFlow::Node getOptionsInput() { + result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr() + } +} + +private newtype TRegexParseMode = + MkIgnoreCase() or // case insensitive + MkVerbose() or // ignores whitespace and `#` comments within patterns + MkDotAll() or // dot matches all characters, including line terminators + MkMultiLine() or // `^` and `$` also match beginning and end of lines + MkUnicode() // Unicode UAX 29 word boundary mode + +/** + * A regular expression parse mode flag. + */ +class RegexParseMode extends TRegexParseMode { + /** + * Gets the name of this parse mode flag. + */ + string getName() { + this = MkIgnoreCase() and result = "IGNORECASE" + or + this = MkVerbose() and result = "VERBOSE" + or + this = MkDotAll() and result = "DOTALL" + or + this = MkMultiLine() and result = "MULTILINE" + or + this = MkUnicode() and result = "UNICODE" + } + + /** + * Gets a textual representation of this `RegexParseMode`. + */ + string toString() { result = this.getName() } +} + +/** + * A unit class for adding additional flow steps for regular expressions. + */ +class RegexAdditionalFlowStep extends Unit { + /** + * Holds if the step from `node1` to `node2` should be considered a flow + * step for regular expressions. + */ + abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); + + /** + * Holds if a regular expression parse mode is either set (`isSet` = true) + * or unset (`isSet` = false) at `node`. Parse modes propagate through + * array construction and regex construction. + */ + abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet); +} + +/** + * An additional flow step for `Regex`. + */ +class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + this.setsParseModeEdge(nodeFrom, nodeTo, _, _) + } + + override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) { + this.setsParseModeEdge(_, node, mode, isSet) + } + + private predicate setsParseModeEdge( + DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet + ) { + // `Regex` methods that modify the parse mode of an existing `Regex` object. + exists(CallExpr ce | + nodeFrom.asExpr() = ce.getQualifier() and + nodeTo.asExpr() = ce and + // decode the parse mode being set + ( + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "ignoresCase(_:)") and + mode = MkIgnoreCase() + or + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and + mode = MkDotAll() + or + ce.getStaticTarget().(Method).hasQualifiedName("Regex", "anchorsMatchLineEndings(_:)") and + mode = MkMultiLine() + ) and + // decode the value being set + if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false + then isSet = false // mode is set to false + else isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value + ) + } +} + +/** + * An additional flow step for `NSRegularExpression`. + */ +class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() } + + override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) { + // `NSRegularExpression.Options` values (these are typically combined, then passed into + // the `NSRegularExpression` initializer). + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "caseInsensitive") and + mode = MkIgnoreCase() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "allowCommentsAndWhitespace") and + mode = MkVerbose() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "dotMatchesLineSeparators") and + mode = MkDotAll() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "anchorsMatchLines") and + mode = MkMultiLine() and + isSet = true + or + node.asExpr() + .(MemberRefExpr) + .getMember() + .(FieldDecl) + .hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and + mode = MkUnicode() and + isSet = true + } +} + /** * A call that evaluates a regular expression. For example, the call to `firstMatch` in: * ``` @@ -91,6 +254,19 @@ abstract class RegexEval extends CallExpr { RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput())) ) } + + /** + * Gets a parse mode that is set at this evaluation (in at least one path + * from the creation of the regular expression object). + */ + RegexParseMode getAParseMode() { + exists(DataFlow::Node setNode | + // parse mode flag is set + any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and + // reaches this eval + RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput())) + ) + } } /** diff --git a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll index 913866aecdf..f49f61cb365 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll @@ -6,6 +6,8 @@ */ import swift +private import RegexTracking +private import codeql.swift.regex.Regex /** * A `Expr` containing a regular expression term, that is, either @@ -317,14 +319,24 @@ abstract class RegExp extends Expr { } /** - * Gets a mode (if any) of this regular expression. Can be any of: + * Gets a mode (if any) of this regular expression in any evaluation. Can be + * any of: * IGNORECASE * VERBOSE * DOTALL * MULTILINE * UNICODE */ - string getAMode() { result = this.getModeFromPrefix() } + string getAMode() { + // mode flags from inside the regex string + result = this.getModeFromPrefix() + or + // mode flags applied to the regex object before evaluation + exists(RegexEval e | + e.getARegex() = this and + result = e.getAParseMode().getName() + ) + } /** * Holds if the `i`th character could not be parsed. diff --git a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll index c0906b12871..46360b66636 100644 --- a/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll +++ b/swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll @@ -6,7 +6,7 @@ import swift import codeql.swift.regex.RegexTreeView -private import codeql.swift.dataflow.DataFlow +import codeql.swift.dataflow.DataFlow private import ParseRegex private import codeql.swift.regex.Regex @@ -37,7 +37,6 @@ private module RegexUseConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { // creation of the regex node instanceof RegexCreation - // TODO: track parse mode flags. } predicate isSink(DataFlow::Node node) { @@ -46,9 +45,60 @@ private module RegexUseConfig implements DataFlow::ConfigSig { } predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // TODO: flow through regex methods that return a modified regex. - none() + any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) } } module RegexUseFlow = DataFlow::Global; + +/** + * A data flow configuration for tracking regular expression parse mode + * flags from wherever they are created or set through to regular expression + * evaluation. The flow state encodes which parse mode flag was set. + */ +private module RegexParseModeConfig implements DataFlow::StateConfigSig { + class FlowState = RegexParseMode; + + predicate isSource(DataFlow::Node node, FlowState flowstate) { + // parse mode flag is set + any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, true) + } + + predicate isSink(DataFlow::Node node, FlowState flowstate) { + // evaluation of the regex + node.asExpr() = any(RegexEval eval).getRegexInput() and + exists(flowstate) + } + + predicate isBarrier(DataFlow::Node node) { none() } + + predicate isBarrier(DataFlow::Node node, FlowState flowstate) { + // parse mode flag is unset + any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, false) + } + + predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // flow through array construction + exists(ArrayExpr arr | + nodeFrom.asExpr() = arr.getAnElement() and + nodeTo.asExpr() = arr + ) + or + // flow through regex creation + exists(RegexCreation create | + nodeFrom = create.getOptionsInput() and + nodeTo = create + ) + or + // additional flow steps for regular expression objects + any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo) + } + + predicate isAdditionalFlowStep( + DataFlow::Node nodeFrom, FlowState flowstateFrom, DataFlow::Node nodeTo, FlowState flowStateTo + ) { + none() + } +} + +module RegexParseModeFlow = DataFlow::GlobalWithState; diff --git a/swift/ql/test/library-tests/regex/parse.expected b/swift/ql/test/library-tests/regex/parse.expected index 008808acf73..c38c5aa6331 100644 --- a/swift/ql/test/library-tests/regex/parse.expected +++ b/swift/ql/test/library-tests/regex/parse.expected @@ -6327,84 +6327,72 @@ redos_variants.swift: # 579| [RegExpConstant, RegExpNormalChar] c regex.swift: -# 110| [RegExpDot] . +# 111| [RegExpDot] . -# 110| [RegExpStar] .* +# 111| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 132| [RegExpDot] . +# 133| [RegExpDot] . -# 132| [RegExpStar] .* +# 133| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 149| [RegExpDot] . +# 150| [RegExpDot] . -# 149| [RegExpStar] .* +# 150| [RegExpStar] .* #-----| 0 -> [RegExpDot] . -# 149| [RegExpDot] . +# 150| [RegExpDot] . -# 149| [RegExpPlus] .+ +# 150| [RegExpPlus] .+ #-----| 0 -> [RegExpDot] . -# 156| [RegExpGroup] ([\w.]+) +# 157| [RegExpGroup] ([\w.]+) #-----| 0 -> [RegExpPlus] [\w.]+ -# 156| [RegExpStar] ([\w.]+)* +# 157| [RegExpStar] ([\w.]+)* #-----| 0 -> [RegExpGroup] ([\w.]+) -# 156| [RegExpCharacterClass] [\w.] +# 157| [RegExpCharacterClass] [\w.] #-----| 0 -> [RegExpCharacterClassEscape] \w #-----| 1 -> [RegExpConstant, RegExpNormalChar] . -# 156| [RegExpPlus] [\w.]+ +# 157| [RegExpPlus] [\w.]+ #-----| 0 -> [RegExpCharacterClass] [\w.] -# 156| [RegExpCharacterClassEscape] \w +# 157| [RegExpCharacterClassEscape] \w -# 156| [RegExpConstant, RegExpNormalChar] . +# 157| [RegExpConstant, RegExpNormalChar] . -# 163| [RegExpConstant, RegExpNormalChar] -# 163| - -# 164| [RegExpConstant, RegExpEscape] \n +# 164| [RegExpConstant, RegExpNormalChar] +# 164| # 165| [RegExpConstant, RegExpEscape] \n -# 175| [RegExpConstant, RegExpNormalChar] aa +# 166| [RegExpConstant, RegExpEscape] \n -# 175| [RegExpAlt] aa|bb +# 176| [RegExpConstant, RegExpNormalChar] aa + +# 176| [RegExpAlt] aa|bb #-----| 0 -> [RegExpConstant, RegExpNormalChar] aa #-----| 1 -> [RegExpConstant, RegExpNormalChar] bb -# 175| [RegExpConstant, RegExpNormalChar] bb +# 176| [RegExpConstant, RegExpNormalChar] bb -# 179| [RegExpConstant, RegExpNormalChar] aa +# 180| [RegExpConstant, RegExpNormalChar] aa -# 179| [RegExpAlt] aa| -# 179| bb +# 180| [RegExpAlt] aa| +# 180| bb #-----| 0 -> [RegExpConstant, RegExpNormalChar] aa #-----| 1 -> [RegExpConstant, RegExpNormalChar] #-----| bb -# 179| [RegExpConstant, RegExpNormalChar] -# 179| bb +# 180| [RegExpConstant, RegExpNormalChar] +# 180| bb -# 187| [RegExpCharacterClass] [a-z] +# 188| [RegExpCharacterClass] [a-z] #-----| 0 -> [RegExpCharacterRange] a-z -# 187| [RegExpConstant, RegExpNormalChar] a - -# 187| [RegExpCharacterRange] a-z -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] z - -# 187| [RegExpConstant, RegExpNormalChar] z - -# 188| [RegExpCharacterClass] [a-zA-Z] -#-----| 0 -> [RegExpCharacterRange] a-z -#-----| 1 -> [RegExpCharacterRange] A-Z - # 188| [RegExpConstant, RegExpNormalChar] a # 188| [RegExpCharacterRange] a-z @@ -6413,189 +6401,208 @@ regex.swift: # 188| [RegExpConstant, RegExpNormalChar] z -# 188| [RegExpConstant, RegExpNormalChar] A +# 189| [RegExpCharacterClass] [a-zA-Z] +#-----| 0 -> [RegExpCharacterRange] a-z +#-----| 1 -> [RegExpCharacterRange] A-Z -# 188| [RegExpCharacterRange] A-Z +# 189| [RegExpConstant, RegExpNormalChar] a + +# 189| [RegExpCharacterRange] a-z +#-----| 0 -> [RegExpConstant, RegExpNormalChar] a +#-----| 1 -> [RegExpConstant, RegExpNormalChar] z + +# 189| [RegExpConstant, RegExpNormalChar] z + +# 189| [RegExpConstant, RegExpNormalChar] A + +# 189| [RegExpCharacterRange] A-Z #-----| 0 -> [RegExpConstant, RegExpNormalChar] A #-----| 1 -> [RegExpConstant, RegExpNormalChar] Z -# 188| [RegExpConstant, RegExpNormalChar] Z +# 189| [RegExpConstant, RegExpNormalChar] Z -# 191| [RegExpCharacterClass] [a-] +# 192| [RegExpCharacterClass] [a-] #-----| 0 -> [RegExpConstant, RegExpNormalChar] a #-----| 1 -> [RegExpConstant, RegExpNormalChar] - -# 191| [RegExpConstant, RegExpNormalChar] a - -# 191| [RegExpConstant, RegExpNormalChar] - - -# 192| [RegExpCharacterClass] [-a] -#-----| 0 -> [RegExpConstant, RegExpNormalChar] - -#-----| 1 -> [RegExpConstant, RegExpNormalChar] a +# 192| [RegExpConstant, RegExpNormalChar] a # 192| [RegExpConstant, RegExpNormalChar] - -# 192| [RegExpConstant, RegExpNormalChar] a - -# 193| [RegExpCharacterClass] [-] +# 193| [RegExpCharacterClass] [-a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] - +#-----| 1 -> [RegExpConstant, RegExpNormalChar] a # 193| [RegExpConstant, RegExpNormalChar] - -# 194| [RegExpCharacterClass] [*] +# 193| [RegExpConstant, RegExpNormalChar] a + +# 194| [RegExpCharacterClass] [-] +#-----| 0 -> [RegExpConstant, RegExpNormalChar] - + +# 194| [RegExpConstant, RegExpNormalChar] - + +# 195| [RegExpCharacterClass] [*] #-----| 0 -> [RegExpConstant, RegExpNormalChar] * -# 194| [RegExpConstant, RegExpNormalChar] * +# 195| [RegExpConstant, RegExpNormalChar] * -# 195| [RegExpCharacterClass] [^a] +# 196| [RegExpCharacterClass] [^a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] a -# 195| [RegExpConstant, RegExpNormalChar] a - -# 196| [RegExpCharacterClass] [a^] -#-----| 0 -> [RegExpConstant, RegExpNormalChar] a -#-----| 1 -> [RegExpConstant, RegExpNormalChar] ^ - # 196| [RegExpConstant, RegExpNormalChar] a -# 196| [RegExpConstant, RegExpNormalChar] ^ +# 197| [RegExpCharacterClass] [a^] +#-----| 0 -> [RegExpConstant, RegExpNormalChar] a +#-----| 1 -> [RegExpConstant, RegExpNormalChar] ^ -# 197| [RegExpCharacterClass] [\\] +# 197| [RegExpConstant, RegExpNormalChar] a + +# 197| [RegExpConstant, RegExpNormalChar] ^ + +# 198| [RegExpCharacterClass] [\\] #-----| 0 -> [RegExpConstant, RegExpEscape] \\ -# 197| [RegExpConstant, RegExpEscape] \\ - -# 198| [RegExpCharacterClass] [\\\]] -#-----| 0 -> [RegExpConstant, RegExpEscape] \\ -#-----| 1 -> [RegExpConstant, RegExpEscape] \] - # 198| [RegExpConstant, RegExpEscape] \\ -# 198| [RegExpConstant, RegExpEscape] \] +# 199| [RegExpCharacterClass] [\\\]] +#-----| 0 -> [RegExpConstant, RegExpEscape] \\ +#-----| 1 -> [RegExpConstant, RegExpEscape] \] -# 199| [RegExpCharacterClass] [:] +# 199| [RegExpConstant, RegExpEscape] \\ + +# 199| [RegExpConstant, RegExpEscape] \] + +# 200| [RegExpCharacterClass] [:] #-----| 0 -> [RegExpConstant, RegExpNormalChar] : -# 199| [RegExpConstant, RegExpNormalChar] : +# 200| [RegExpConstant, RegExpNormalChar] : -# 200| [RegExpNamedCharacterProperty] [:digit:] +# 201| [RegExpNamedCharacterProperty] [:digit:] -# 201| [RegExpNamedCharacterProperty] [:alnum:] +# 202| [RegExpNamedCharacterProperty] [:alnum:] -# 204| [RegExpCharacterClass] []a] +# 205| [RegExpCharacterClass] []a] #-----| 0 -> [RegExpConstant, RegExpNormalChar] ] #-----| 1 -> [RegExpConstant, RegExpNormalChar] a -# 204| [RegExpConstant, RegExpNormalChar] ] +# 205| [RegExpConstant, RegExpNormalChar] ] -# 204| [RegExpConstant, RegExpNormalChar] a +# 205| [RegExpConstant, RegExpNormalChar] a -# 205| [RegExpNamedCharacterProperty] [:aaaaa:] +# 206| [RegExpNamedCharacterProperty] [:aaaaa:] -# 209| [RegExpGroup] (?i) +# 211| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 209| [RegExpSequence] (?i)abc +# 211| [RegExpSequence] (?i)abc #-----| 0 -> [RegExpGroup] (?i) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 209| [RegExpConstant, RegExpNormalChar] i - -# 209| [RegExpConstant, RegExpNormalChar] abc - -# 210| [RegExpGroup] (?s) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] s - -# 210| [RegExpSequence] (?s)abc -#-----| 0 -> [RegExpGroup] (?s) -#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc - -# 210| [RegExpConstant, RegExpNormalChar] s - -# 210| [RegExpConstant, RegExpNormalChar] abc - -# 211| [RegExpGroup] (?is) -#-----| 0 -> [RegExpConstant, RegExpNormalChar] is - -# 211| [RegExpSequence] (?is)abc -#-----| 0 -> [RegExpGroup] (?is) -#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc - -# 211| [RegExpConstant, RegExpNormalChar] is +# 211| [RegExpConstant, RegExpNormalChar] i # 211| [RegExpConstant, RegExpNormalChar] abc +# 212| [RegExpGroup] (?s) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] s + +# 212| [RegExpSequence] (?s)abc +#-----| 0 -> [RegExpGroup] (?s) +#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc + +# 212| [RegExpConstant, RegExpNormalChar] s + +# 212| [RegExpConstant, RegExpNormalChar] abc + +# 213| [RegExpGroup] (?is) +#-----| 0 -> [RegExpConstant, RegExpNormalChar] is + +# 213| [RegExpSequence] (?is)abc +#-----| 0 -> [RegExpGroup] (?is) +#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc + +# 213| [RegExpConstant, RegExpNormalChar] is + # 213| [RegExpConstant, RegExpNormalChar] abc -# 214| [RegExpConstant, RegExpNormalChar] abc - -# 215| [RegExpConstant, RegExpNormalChar] abc - -# 216| [RegExpConstant, RegExpNormalChar] abc - -# 217| [RegExpConstant, RegExpNormalChar] abc - -# 219| [RegExpDot] . - -# 219| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 220| [RegExpDot] . - -# 220| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 221| [RegExpDot] . - -# 221| [RegExpStar] .* -#-----| 0 -> [RegExpDot] . - -# 227| [RegExpGroup] (?i-s) +# 214| [RegExpGroup] (?i-s) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i-s -# 227| [RegExpSequence] (?i-s)abc +# 214| [RegExpSequence] (?i-s)abc #-----| 0 -> [RegExpGroup] (?i-s) #-----| 1 -> [RegExpConstant, RegExpNormalChar] abc -# 227| [RegExpConstant, RegExpNormalChar] i-s +# 214| [RegExpConstant, RegExpNormalChar] i-s -# 227| [RegExpConstant, RegExpNormalChar] abc +# 214| [RegExpConstant, RegExpNormalChar] abc -# 230| [RegExpConstant, RegExpNormalChar] abc +# 217| [RegExpConstant, RegExpNormalChar] abc -# 230| [RegExpSequence] abc(?i)def +# 217| [RegExpSequence] abc(?i)def #-----| 0 -> [RegExpConstant, RegExpNormalChar] abc #-----| 1 -> [RegExpGroup] (?i) #-----| 2 -> [RegExpConstant, RegExpNormalChar] def -# 230| [RegExpGroup] (?i) +# 217| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 230| [RegExpConstant, RegExpNormalChar] i +# 217| [RegExpConstant, RegExpNormalChar] i -# 230| [RegExpConstant, RegExpNormalChar] def +# 217| [RegExpConstant, RegExpNormalChar] def -# 231| [RegExpConstant, RegExpNormalChar] abc +# 218| [RegExpConstant, RegExpNormalChar] abc -# 231| [RegExpSequence] abc(?i:def)ghi +# 218| [RegExpSequence] abc(?i:def)ghi #-----| 0 -> [RegExpConstant, RegExpNormalChar] abc #-----| 1 -> [RegExpGroup] (?i:def) #-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi -# 231| [RegExpGroup] (?i:def) +# 218| [RegExpGroup] (?i:def) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def -# 231| [RegExpConstant, RegExpNormalChar] i:def +# 218| [RegExpConstant, RegExpNormalChar] i:def -# 231| [RegExpConstant, RegExpNormalChar] ghi +# 218| [RegExpConstant, RegExpNormalChar] ghi -# 232| [RegExpGroup] (?i) +# 219| [RegExpGroup] (?i) #-----| 0 -> [RegExpConstant, RegExpNormalChar] i -# 232| [RegExpConstant, RegExpNormalChar] i +# 219| [RegExpConstant, RegExpNormalChar] i -# 232| [RegExpConstant, RegExpNormalChar] abc +# 219| [RegExpConstant, RegExpNormalChar] abc -# 232| [RegExpConstant, RegExpNormalChar] -i +# 219| [RegExpConstant, RegExpNormalChar] -i -# 232| [RegExpConstant, RegExpNormalChar] def +# 219| [RegExpConstant, RegExpNormalChar] def + +# 222| [RegExpConstant, RegExpNormalChar] abc + +# 223| [RegExpConstant, RegExpNormalChar] abc + +# 224| [RegExpConstant, RegExpNormalChar] abc + +# 225| [RegExpConstant, RegExpNormalChar] abc + +# 226| [RegExpConstant, RegExpNormalChar] abc + +# 227| [RegExpConstant, RegExpNormalChar] abc + +# 230| [RegExpDot] . + +# 230| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 231| [RegExpDot] . + +# 231| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 232| [RegExpDot] . + +# 232| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . + +# 235| [RegExpDot] . + +# 235| [RegExpStar] .* +#-----| 0 -> [RegExpDot] . diff --git a/swift/ql/test/library-tests/regex/regex.swift b/swift/ql/test/library-tests/regex/regex.swift index 8b9c5a18a07..a53e3d389fd 100644 --- a/swift/ql/test/library-tests/regex/regex.swift +++ b/swift/ql/test/library-tests/regex/regex.swift @@ -17,8 +17,9 @@ struct Regex : RegexComponent { init(_ pattern: String) throws where Output == AnyRegexOutput { } - func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return self } - func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return self } + func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } + func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } + func anchorsMatchLineEndings(_ matchLineEndings: Bool = true) -> Regex.RegexOutput> { return 0 as! Self } func firstMatch(in string: String) throws -> Regex.Match? { return nil } func prefixMatch(in string: String) throws -> Regex.Match? { return nil } @@ -206,28 +207,38 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws { // --- parse modes --- + // parse modes encoded in the string _ = try Regex("(?i)abc").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc _ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc _ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc - - _ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL - _ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc - _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc - _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL - _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc MISSING: modes="DOTALL | IGNORECASE" - - _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE - _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL - _ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE" - - _ = input.replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive]) // $ MISSING: regex=.* input=input modes=IGNORECASE - - _ = NSString(string: "abc").replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive], range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS modes=IGNORECASE - _ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc MISSING: modes=IGNORECASE SPURIOUS: modes="DOTALL | IGNORECASE" // these cases use parse modes on localized areas of the regex, which we don't currently support _ = try Regex("abc(?i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i)def _ = try Regex("abc(?i:def)ghi").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i:def)ghi _ = try Regex("(?i)abc(?-i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc(?-i)def SPURIOUS: hasParseFailure= + + // parse modes set through Regex + _ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL + _ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc + _ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc + _ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL + _ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc modes="DOTALL | IGNORECASE" + _ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE + + // parse modes set through NSRegularExpression + _ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=IGNORECASE + _ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=DOTALL + _ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE" + + let myOptions1 : NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators] + _ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE" + + // parse modes set through other methods + + let myOptions2 : NSString.CompareOptions = [.regularExpression, .caseInsensitive] + _ = input.replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive]) // $ MISSING: regex=.* input=input modes=IGNORECASE + _ = input.replacingOccurrences(of: ".*", with: "", options: myOptions2) // $ MISSING: regex=.* input=input modes=IGNORECASE + _ = NSString(string: "abc").replacingOccurrences(of: ".*", with: "", options: [.regularExpression, .caseInsensitive], range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS modes=IGNORECASE + _ = NSString(string: "abc").replacingOccurrences(of: ".*", with: "", options: myOptions2, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS modes=IGNORECASE }