Python: Simplify guards as suggested

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-09-10 10:31:48 +02:00
parent 43effd2b40
commit b20232db3c
4 changed files with 61 additions and 73 deletions

View File

@@ -36,11 +36,13 @@ module ModificationOfParameterWithDefault {
override predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
// if we are tracking a empty default, then we should not modify falsey values
nonEmptyDefault = false and guard instanceof BlocksFalsey
// if we are tracking a empty default, then it is ok to modify truthy values,
// so our tracking ends at those.
nonEmptyDefault = false and guard instanceof BlocksTruthy
or
// if we are tracking a non-empty default, then we should not modify truthy values
nonEmptyDefault = true and guard instanceof BlocksTruthy
// if we are tracking a non-empty default, then it is ok to modify falsy values,
// so our tracking ends at those.
nonEmptyDefault = true and guard instanceof BlocksFalsey
}
}
}

View File

@@ -34,20 +34,26 @@ module ModificationOfParameterWithDefault {
/**
* A sanitizer guard that does not let a truthy value flow to the true branch.
*
* Implementation note:
* Since guards with different behaviour cannot exist on the same node,
* we let all guards have the same behaviour, in the sense that they all check
* the true branch. Instead, we partition guards into those that block
* truthy values and those that block falsy values.
*
* If you extend this class, make sure that your barrier checks the true branch.
*/
abstract class BlocksTruthy extends DataFlow::BarrierGuard { }
/**
* A sanitizer guard that does not let a falsy value flow to the true branch.
*
* Implementation note:
* Since guards with different behaviour cannot exist on the same node,
* we let all guards have the same behaviour, in the sense that they all check
* the true branch. Instead, we partition guards into those that block
* truthy values and those that block falsy values.
*
* If you extend this class, make sure that your barrier checks the true branch.
*/
abstract class BlocksFalsey extends DataFlow::BarrierGuard { }
@@ -135,78 +141,40 @@ module ModificationOfParameterWithDefault {
}
/**
* An expression that is checked directly in an `if`, possibly with `not`, such as `if x:` or `if not x:`.
*/
private class IdentityGuarded extends Expr {
boolean inverted;
IdentityGuarded() {
this = any(If i).getTest() and
inverted = false
or
exists(IdentityGuarded ig, UnaryExpr notExp |
notExp.getOp() instanceof Not and
ig = notExp and
notExp.getOperand() = this
|
inverted = ig.isInverted().booleanNot()
)
}
/**
* Whether this guard has been inverted. For `if x:` the result is `false`, and for `if not x:` the result is `true`.
*/
boolean isInverted() { result = inverted }
}
/**
* Holds iff `guard` is checking the `Name` represented by `guarded` for truthyness.
* `result` is true if the check is inverted and false if it is not.
*/
boolean isIdentityGuard(DataFlow::GuardNode guard, ControlFlowNode guarded) {
exists(IdentityGuarded ig |
ig instanceof Name and
// In `not l`, the `ControlFlowNode` for `l` is not an instance of `GuardNode`.
// TODO: This is slightly naive, not handling e.g. `l or cond` correctly.
// We should change it when we have a proper guards library.
guard.getNode().getAChildNode*() = ig and
result = ig.isInverted() and
guarded.getNode() = ig
)
}
/**
* A sanitizer guard that does not let a truthy value flow to the true branch.
* Based on `isIdentityGuard`, so comes with the same caveats.
* A simple sanitizer guard that does not let a truthy value flow to the true branch.
*
* Blocks flow of `x` in the true branch in the example below.
* ```py
* if x:
* x.append(42)
* ```
*/
class BlocksTruthyGuard extends BlocksTruthy {
ControlFlowNode guarded;
BlocksTruthyGuard() {
// The raw guard is true if the value is non-empty.
// We wish to send truthy falues to the false branch,
// se we are looking for inverted guards.
isIdentityGuard(this, guarded) = true
}
BlocksTruthyGuard() { this instanceof NameNode }
override predicate checks(ControlFlowNode node, boolean branch) {
node = guarded and
node = this and
branch = true
}
}
/**
* A sanitizer guard that does not let a falsy value flow to the true branch.
* Based on `isIdentityGuard`, so comes with the same caveats.
* A simple sanitizer guard that does not let a truthy value flow to the true branch.
*
* Blocks flow of `x` in the true branch in the example below.
* ```py
* if not x:
* x.append(42)
* ```
*/
class BlocksFalseyGuard extends BlocksFalsey {
ControlFlowNode guarded;
NameNode guarded;
BlocksFalseyGuard() {
// The raw guard is true if the value is non-empty.
// We wish to send falsy falues to the false branch,
// se we are looking for guards that are not inverted.
isIdentityGuard(this, guarded) = false
this.(UnaryExprNode).getNode().getOp() instanceof Not and
guarded = this.(UnaryExprNode).getOperand()
}
override predicate checks(ControlFlowNode node, boolean branch) {

View File

@@ -24,8 +24,12 @@ edges
| test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d |
| test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d |
| test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d |
| test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l |
| test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l |
| test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l |
| test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l |
| test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l |
| test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l |
| test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l |
| test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l |
nodes
| test.py:2:12:2:12 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:3:5:3:5 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
@@ -71,10 +75,16 @@ nodes
| test.py:115:5:115:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d |
| test.py:119:29:119:29 | ControlFlowNode for d | semmle.label | ControlFlowNode for d |
| test.py:121:5:121:5 | ControlFlowNode for d | semmle.label | ControlFlowNode for d |
| test.py:137:15:137:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:139:9:139:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:143:23:143:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:145:9:145:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:124:15:124:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:128:9:128:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:131:23:131:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:135:9:135:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:138:15:138:15 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:140:9:140:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:142:9:142:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:145:23:145:23 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:147:9:147:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
| test.py:149:9:149:9 | ControlFlowNode for l | semmle.label | ControlFlowNode for l |
#select
| test.py:3:5:3:5 | ControlFlowNode for l | test.py:2:12:2:12 | ControlFlowNode for l | test.py:3:5:3:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:2:12:2:12 | ControlFlowNode for l | Default value |
| test.py:8:5:8:5 | ControlFlowNode for l | test.py:7:11:7:11 | ControlFlowNode for l | test.py:8:5:8:5 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:7:11:7:11 | ControlFlowNode for l | Default value |
@@ -95,5 +105,9 @@ nodes
| test.py:109:9:109:9 | ControlFlowNode for d | test.py:108:14:108:14 | ControlFlowNode for d | test.py:109:9:109:9 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:108:14:108:14 | ControlFlowNode for d | Default value |
| test.py:115:5:115:5 | ControlFlowNode for d | test.py:113:20:113:20 | ControlFlowNode for d | test.py:115:5:115:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:113:20:113:20 | ControlFlowNode for d | Default value |
| test.py:121:5:121:5 | ControlFlowNode for d | test.py:119:29:119:29 | ControlFlowNode for d | test.py:121:5:121:5 | ControlFlowNode for d | $@ flows to here and is mutated. | test.py:119:29:119:29 | ControlFlowNode for d | Default value |
| test.py:139:9:139:9 | ControlFlowNode for l | test.py:137:15:137:15 | ControlFlowNode for l | test.py:139:9:139:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:137:15:137:15 | ControlFlowNode for l | Default value |
| test.py:145:9:145:9 | ControlFlowNode for l | test.py:143:23:143:23 | ControlFlowNode for l | test.py:145:9:145:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:143:23:143:23 | ControlFlowNode for l | Default value |
| test.py:128:9:128:9 | ControlFlowNode for l | test.py:124:15:124:15 | ControlFlowNode for l | test.py:128:9:128:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:124:15:124:15 | ControlFlowNode for l | Default value |
| test.py:135:9:135:9 | ControlFlowNode for l | test.py:131:23:131:23 | ControlFlowNode for l | test.py:135:9:135:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:131:23:131:23 | ControlFlowNode for l | Default value |
| test.py:140:9:140:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:140:9:140:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value |
| test.py:142:9:142:9 | ControlFlowNode for l | test.py:138:15:138:15 | ControlFlowNode for l | test.py:142:9:142:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:138:15:138:15 | ControlFlowNode for l | Default value |
| test.py:147:9:147:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:147:9:147:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value |
| test.py:149:9:149:9 | ControlFlowNode for l | test.py:145:23:145:23 | ControlFlowNode for l | test.py:149:9:149:9 | ControlFlowNode for l | $@ flows to here and is mutated. | test.py:145:23:145:23 | ControlFlowNode for l | Default value |

View File

@@ -121,26 +121,30 @@ def dict_update_op_nochange(d = {}):
d |= x #$ SPURIOUS: modification=d
return d
# OK
def sanitizer(l = []):
if l:
l.append(1)
else:
l.append(1) #$ modification=l
return l
# OK
def sanitizer_negated(l = [1]):
if not l:
l.append(1)
else:
l.append(1) #$ modification=l
return l
# Not OK
def sanitizer(l = []):
if not l:
l.append(1) #$ modification=l
else:
l.append(1) #$ SPURIOUS: modification=l
return l
# Not OK
def sanitizer_negated(l = [1]):
if l:
l.append(1) #$ modification=l
else:
l.append(1) #$ SPURIOUS: modification=l
return l