Merge pull request #329 from github/hvitved/dataflow/synth-return

Data flow: Add a synthetic return node
This commit is contained in:
Tom Hvitved
2021-10-07 13:06:39 +02:00
committed by GitHub
8 changed files with 154 additions and 49 deletions

View File

@@ -436,7 +436,7 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
* Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
*/
predicate exprNodeReturnedFrom(DataFlow::ExprNode e, Callable c) {
exists(ReturnNode r |
exists(ReturningNode r |
r.getEnclosingCallable().asCallable() = c and
(
r.(ExplicitReturnNode).getReturningNode().getReturnedValueNode() = e.asExpr() or

View File

@@ -46,18 +46,19 @@ module LocalFlow {
)
}
/** Gets the SSA definition node corresponding to parameter `p`. */
SsaDefinitionNode getParameterDefNode(NamedParameter p) {
exists(BasicBlock bb, int i |
bb.getNode(i).getNode() = p.getDefiningAccess() and
result.getDefinition().definesAt(_, bb, i)
)
}
/**
* Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving
* SSA definition `def.
* SSA definition `def`.
*/
predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) {
// Flow from parameter into SSA definition
exists(BasicBlock bb, int i |
bb.getNode(i).getNode() =
nodeFrom.(ParameterNode).getParameter().(NamedParameter).getDefiningAccess() and
nodeTo.(SsaDefinitionNode).getDefinition().definesAt(_, bb, i)
)
or
// Flow from assignment into SSA definition
def.(Ssa::WriteDefinition).assigns(nodeFrom.asExpr()) and
nodeTo.(SsaDefinitionNode).getDefinition() = def
@@ -114,6 +115,12 @@ private module Cached {
newtype TNode =
TExprNode(CfgNodes::ExprCfgNode n) or
TReturningNode(CfgNodes::ReturningCfgNode n) or
TSynthReturnNode(CfgScope scope, ReturnKind kind) {
exists(ReturningNode ret |
ret.(NodeImpl).getCfgScope() = scope and
ret.getKind() = kind
)
} or
TSsaDefinitionNode(Ssa::Definition def) or
TNormalParameterNode(Parameter p) { not p instanceof BlockParameter } or
TSelfParameterNode(MethodBase m) or
@@ -139,21 +146,14 @@ private module Cached {
class TParameterNode =
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
/**
* This is the local flow predicate that is used as a building block in global
* data flow. It excludes SSA flow through instance fields, as flow through fields
* is handled by the global data-flow library, but includes various other steps
* that are only relevant for global flow.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
exists(Ssa::Definition def | LocalFlow::localSsaFlowStep(def, nodeFrom, nodeTo))
private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
or
nodeTo.(ParameterNode).getParameter().(OptionalParameter).getDefaultValue() =
nodeFrom.asExpr().getExpr()
or
nodeTo.(ParameterNode).getParameter().(KeywordParameter).getDefaultValue() =
nodeFrom.asExpr().getExpr()
p.(KeywordParameter).getDefaultValue() = e.getExprNode().getExpr()
}
private predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
LocalFlow::localSsaFlowStep(_, nodeFrom, nodeTo)
or
nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and
nodeTo.asExpr().getExpr() instanceof Self
@@ -186,12 +186,66 @@ private module Cached {
) and
nodeFrom.asExpr() = for.getValue()
)
}
/**
* This is the local flow predicate that is used as a building block in global
* data flow.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
or
nodeTo.(SynthReturnNode).getAnInput() = nodeFrom
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
}
/** This is the local flow predicate that is exposed. */
cached
predicate isLocalSourceNode(Node n) { not simpleLocalFlowStep+(any(ExprNode e), n) }
predicate localFlowStepImpl(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
}
/** This is the local flow predicate that is used in type tracking. */
cached
predicate localFlowStepTypeTracker(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
exists(NamedParameter p |
defaultValueFlow(p, nodeFrom) and
nodeTo = LocalFlow::getParameterDefNode(p)
)
}
cached
predicate isLocalSourceNode(Node n) {
n instanceof ParameterNode
or
// This case should not be needed once we have proper use-use flow
// for `self`. At that point, the `self`s returned by `trackInstance`
// in `DataFlowDispatch.qll` should refer to the post-update node,
// and we can remove this case.
n instanceof SelfArgumentNode
or
not localFlowStepTypeTracker+(any(Node e |
e instanceof ExprNode
or
e instanceof ParameterNode
), n)
}
cached
newtype TContent = TTodoContent() // stub
@@ -208,6 +262,8 @@ predicate nodeIsHidden(Node n) {
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
or
n instanceof SynthReturnNode
}
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -234,7 +290,7 @@ class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
* `ControlFlow::Node`s.
*/
class ReturningStatementNode extends NodeImpl, TReturningNode {
private CfgNodes::ReturningCfgNode n;
CfgNodes::ReturningCfgNode n;
ReturningStatementNode() { this = TReturningNode(n) }
@@ -436,6 +492,12 @@ private module ArgumentNodes {
import ArgumentNodes
/** A data-flow node that represents a value syntactically returned by a callable. */
abstract class ReturningNode extends Node {
/** Gets the kind of this return node. */
abstract ReturnKind getKind();
}
/** A data-flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
@@ -463,11 +525,9 @@ private module ReturnNodes {
* A data-flow node that represents an expression returned by a callable,
* either using an explict `return` statement or as the expression of a method body.
*/
class ExplicitReturnNode extends ReturnNode, ReturningStatementNode {
private CfgNodes::ReturningCfgNode n;
class ExplicitReturnNode extends ReturningNode, ReturningStatementNode {
ExplicitReturnNode() {
isValid(this.getReturningNode()) and
isValid(n) and
n.getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
n.getScope() instanceof Callable
}
@@ -479,7 +539,7 @@ private module ReturnNodes {
}
}
class ExprReturnNode extends ReturnNode, ExprNode {
class ExprReturnNode extends ReturningNode, ExprNode {
ExprReturnNode() {
this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
this.(NodeImpl).getCfgScope() instanceof Callable
@@ -488,6 +548,34 @@ private module ReturnNodes {
override ReturnKind getKind() { result instanceof NormalReturnKind }
}
/**
* A synthetic data-flow node for joining flow from different syntactic
* returns into a single node.
*
* This node only exists to avoid computing the product of a large fan-in
* with a large fan-out.
*/
class SynthReturnNode extends NodeImpl, ReturnNode, TSynthReturnNode {
private CfgScope scope;
private ReturnKind kind;
SynthReturnNode() { this = TSynthReturnNode(scope, kind) }
/** Gets a syntactic return node that flows into this synthetic node. */
ReturningNode getAnInput() {
result.(NodeImpl).getCfgScope() = scope and
result.getKind() = kind
}
override ReturnKind getKind() { result = kind }
override CfgScope getCfgScope() { result = scope }
override Location getLocationImpl() { result = scope.getLocation() }
override string toStringImpl() { result = "return " + kind + " in " + scope }
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
@@ -631,7 +719,7 @@ private import PostUpdateNodes
/** A node that performs a type cast. */
class CastNode extends Node {
CastNode() { none() }
CastNode() { this instanceof ReturningNode }
}
class DataFlowExpr = CfgNodes::ExprCfgNode;

View File

@@ -120,7 +120,7 @@ predicate hasLocalSource(Node sink, Node source) {
or
exists(Node mid |
hasLocalSource(mid, source) and
simpleLocalFlowStep(mid, sink)
localFlowStepTypeTracker(mid, sink)
)
}
@@ -136,13 +136,7 @@ ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
}
predicate localFlowStep = localFlowStepImpl/2;
/**
* Holds if data flows from `source` to `sink` in zero or more local

View File

@@ -11,7 +11,7 @@ class Node = DataFlowPublic::Node;
class TypeTrackingNode = DataFlowPublic::LocalSourceNode;
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
predicate simpleLocalFlowStep = DataFlowPrivate::localFlowStepTypeTracker/2;
predicate jumpStep = DataFlowPrivate::jumpStep/2;
@@ -30,12 +30,21 @@ string getPossibleContentName() { result = getSetterCallAttributeName(_) }
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
predicate callStep(Node nodeFrom, Node nodeTo) {
exists(ExprNodes::CallCfgNode call, CFG::CfgScope callable, int i |
DataFlowDispatch::getTarget(call) = callable and
nodeFrom.sourceArgumentOf(call, i) and
nodeFrom.(DataFlowPrivate::ArgumentNode).sourceArgumentOf(call, i) and
nodeTo.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable, i)
)
or
// In normal data-flow, this will be a local flow step. But for type tracking
// we model it as a call step, in order to avoid computing a potential
// self-cross product of all calls to a function that returns one of its parameters
// (only to later filter that flow out using `TypeTracker::append`).
nodeTo =
DataFlowPrivate::LocalFlow::getParameterDefNode(nodeFrom
.(DataFlowPublic::ParameterNode)
.getParameter())
}
/**
@@ -45,11 +54,18 @@ predicate callStep(DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPublic::Param
* recursion (or, at best, terrible performance), since identifying calls to library
* methods is done using API graphs (which uses type tracking).
*/
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
predicate returnStep(Node nodeFrom, Node nodeTo) {
exists(ExprNodes::CallCfgNode call |
nodeFrom instanceof DataFlowPrivate::ReturnNode and
nodeFrom.(DataFlowPrivate::NodeImpl).getCfgScope() = DataFlowDispatch::getTarget(call) and
nodeTo.asExpr().getNode() = call.getNode()
)
or
// In normal data-flow, this will be a local flow step. But for type tracking
// we model it as a returning flow step, in order to avoid computing a potential
// self-cross product of all calls to a function that returns one of its parameters
// (only to later filter that flow out using `TypeTracker::append`).
nodeTo.(DataFlowPrivate::SynthReturnNode).getAnInput() = nodeFrom
}
/**

View File

@@ -1,4 +1,4 @@
import ruby
import codeql.ruby.dataflow.internal.DataFlowPrivate
select any(ReturnNode node)
select any(ReturningNode node)

View File

@@ -3,9 +3,7 @@ edges
| UrlRedirect.rb:14:17:14:22 | call to params : | UrlRedirect.rb:14:17:14:43 | call to fetch |
| UrlRedirect.rb:19:17:19:22 | call to params : | UrlRedirect.rb:19:17:19:37 | call to to_unsafe_hash |
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:24:17:24:37 | call to filter_params |
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:56:21:56:32 | input_params : |
| UrlRedirect.rb:34:20:34:25 | call to params : | UrlRedirect.rb:34:17:34:37 | "#{...}/foo" |
| UrlRedirect.rb:56:21:56:32 | input_params : | UrlRedirect.rb:57:5:57:29 | call to permit : |
nodes
| UrlRedirect.rb:4:17:4:22 | call to params | semmle.label | call to params |
| UrlRedirect.rb:9:17:9:22 | call to params : | semmle.label | call to params : |
@@ -18,10 +16,7 @@ nodes
| UrlRedirect.rb:24:31:24:36 | call to params : | semmle.label | call to params : |
| UrlRedirect.rb:34:17:34:37 | "#{...}/foo" | semmle.label | "#{...}/foo" |
| UrlRedirect.rb:34:20:34:25 | call to params : | semmle.label | call to params : |
| UrlRedirect.rb:56:21:56:32 | input_params : | semmle.label | input_params : |
| UrlRedirect.rb:57:5:57:29 | call to permit : | semmle.label | call to permit : |
subpaths
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:56:21:56:32 | input_params : | UrlRedirect.rb:57:5:57:29 | call to permit : | UrlRedirect.rb:24:17:24:37 | call to filter_params : |
#select
| UrlRedirect.rb:4:17:4:22 | call to params | UrlRedirect.rb:4:17:4:22 | call to params | UrlRedirect.rb:4:17:4:22 | call to params | Untrusted URL redirection due to $@. | UrlRedirect.rb:4:17:4:22 | call to params | a user-provided value |
| UrlRedirect.rb:9:17:9:28 | ...[...] | UrlRedirect.rb:9:17:9:22 | call to params : | UrlRedirect.rb:9:17:9:28 | ...[...] | Untrusted URL redirection due to $@. | UrlRedirect.rb:9:17:9:22 | call to params | a user-provided value |

View File

@@ -6,6 +6,8 @@ edges
| HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" : | HardcodedCredentials.rb:23:19:23:20 | pw : |
| HardcodedCredentials.rb:23:19:23:20 | pw : | HardcodedCredentials.rb:1:23:1:30 | password |
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | HardcodedCredentials.rb:31:18:31:23 | passwd |
| HardcodedCredentials.rb:43:29:43:43 | "user@test.com" : | HardcodedCredentials.rb:43:18:43:25 | username |
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | HardcodedCredentials.rb:43:46:43:53 | password |
nodes
| HardcodedCredentials.rb:1:23:1:30 | password | semmle.label | password |
| HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | semmle.label | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." |
@@ -19,6 +21,10 @@ nodes
| HardcodedCredentials.rb:23:19:23:20 | pw : | semmle.label | pw : |
| HardcodedCredentials.rb:31:18:31:23 | passwd | semmle.label | passwd |
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | semmle.label | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : |
| HardcodedCredentials.rb:43:18:43:25 | username | semmle.label | username |
| HardcodedCredentials.rb:43:29:43:43 | "user@test.com" : | semmle.label | "user@test.com" : |
| HardcodedCredentials.rb:43:46:43:53 | password | semmle.label | password |
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | semmle.label | "abcdef123456" : |
subpaths
#select
| HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | Use of $@. | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | hardcoded credentials |
@@ -29,3 +35,5 @@ subpaths
| HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." | HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." : | HardcodedCredentials.rb:1:23:1:30 | password | Use of $@. | HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." | hardcoded credentials |
| HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" | HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" : | HardcodedCredentials.rb:1:23:1:30 | password | Use of $@. | HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" | hardcoded credentials |
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." | HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | HardcodedCredentials.rb:31:18:31:23 | passwd | Use of $@. | HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." | hardcoded credentials |
| HardcodedCredentials.rb:43:29:43:43 | "user@test.com" | HardcodedCredentials.rb:43:29:43:43 | "user@test.com" : | HardcodedCredentials.rb:43:18:43:25 | username | Use of $@. | HardcodedCredentials.rb:43:29:43:43 | "user@test.com" | hardcoded credentials |
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" | HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | HardcodedCredentials.rb:43:46:43:53 | password | Use of $@. | HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" | hardcoded credentials |

View File

@@ -39,3 +39,7 @@ Passwords::KnownPasswords.new.include?("kdW/xVhiv6y1fQQNevDpUaq+2rfPKfh+teE/45zS
# Call to unrelated method with same name (should not be flagged)
"foobar".include?("foo")
def default_cred(username = "user@test.com", password = "abcdef123456")
username
end