Merge pull request #4828 from yoff/yoff-python-add-source-nodes

Python: add source nodes
This commit is contained in:
Taus
2021-01-05 15:07:51 +01:00
committed by GitHub
13 changed files with 110 additions and 58 deletions

View File

@@ -338,7 +338,7 @@ module HTTP {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
result = str.getText()
)
}
@@ -405,7 +405,9 @@ module HTTP {
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getMimetypeOrContentTypeArg()) and
DataFlow::exprNode(str)
.(DataFlow::LocalSourceNode)
.flowsTo(this.getMimetypeOrContentTypeArg()) and
result = str.getText().splitAt(";", 0)
)
or

View File

@@ -584,18 +584,40 @@ class Slice extends Slice_ {
}
}
/**
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
*
* Helper predicate for `StrConst::isUnicode`.
*/
pragma[nomagic]
private string unicode_prefix() {
result = any(Str_ s).getPrefix() and
result.charAt(_) in ["u", "U"]
}
/**
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
*
* Helper predicate for `StrConst::isUnicode`.
*/
pragma[nomagic]
private string non_byte_prefix() {
result = any(Str_ s).getPrefix() and
not result.charAt(_) in ["b", "B"]
}
/** A string constant. */
class StrConst extends Str_, ImmutableLiteral {
/* syntax: "hello" */
predicate isUnicode() {
this.getPrefix().charAt(_) = "u"
this.getPrefix() = unicode_prefix()
or
this.getPrefix().charAt(_) = "U"
or
not this.getPrefix().charAt(_) = "b" and major_version() = 3
or
not this.getPrefix().charAt(_) = "b" and
this.getEnclosingModule().hasFromFuture("unicode_literals")
this.getPrefix() = non_byte_prefix() and
(
major_version() = 3
or
this.getEnclosingModule().hasFromFuture("unicode_literals")
)
}
deprecated override string strValue() { result = this.getS() }

View File

@@ -51,7 +51,7 @@ module StepSummary {
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
}
@@ -82,9 +82,8 @@ module StepSummary {
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo) or
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
simpleLocalFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo)
}
/**
@@ -142,11 +141,11 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
exists(AttrWrite a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getValue() and
simpleLocalFlowStep*(nodeTo, a.getObject())
nodeTo.flowsTo(a.getObject())
)
}
@@ -275,7 +274,7 @@ class TypeTracker extends TTypeTracker {
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(Node nodeFrom, Node nodeTo) {
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, nodeTo, summary) and
result = this.append(summary)

View File

@@ -30,8 +30,8 @@ abstract class AttrRef extends Node {
predicate mayHaveAttributeName(string attrName) {
attrName = this.getAttributeName()
or
exists(Node nodeFrom |
localFlow(nodeFrom, this.getAttributeNameExpr()) and
exists(LocalSourceNode nodeFrom |
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
attrName = nodeFrom.asExpr().(StrConst).getText()
)
}

View File

@@ -186,6 +186,7 @@ module EssaFlow {
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// If there is ESSA-flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
@@ -219,12 +220,9 @@ private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
* Holds if `result` is either `node`, or the post-update node for `node`.
*/
private Node update(Node node) {
exists(PostUpdateNode pun |
node = pun.getPreUpdateNode() and
result = pun
)
or
result = node
or
result.(PostUpdateNode).getPreUpdateNode() = node
}
// TODO: Make modules for these headings

View File

@@ -376,6 +376,19 @@ class BarrierGuard extends GuardNode {
}
}
/**
* A data flow node that is a source of local flow. This includes things like
* - Expressions
* - Function parameters
*/
class LocalSourceNode extends Node {
LocalSourceNode() { not simpleLocalFlowStep(_, this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
cached
predicate flowsTo(Node nodeTo) { simpleLocalFlowStep*(this, nodeTo) }
}
/**
* Algebraic datatype for tracking data content associated with values.
* Content can be collection elements or object attributes.

View File

@@ -46,7 +46,7 @@ Node importNode(string name) {
or
name = alias.getValue().(ImportExpr).getImportedModuleName()
) and
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
result.asExpr() = alias.getValue()
)
or
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to

View File

@@ -1841,7 +1841,7 @@ private module Django {
DjangoRouteRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
}
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }

View File

@@ -319,9 +319,9 @@ private module FlaskModel {
}
override Function getARouteHandler() {
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
exists(DataFlow::Node view_func_arg, DataFlow::LocalSourceNode func_src |
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
DataFlow::localFlow(func_src, view_func_arg) and
func_src.flowsTo(view_func_arg) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
}

View File

@@ -229,23 +229,32 @@ predicate class_method(
PointsToInternal::pointsTo(instantiation.getArg(0), context, function, _)
}
/**
* Holds if the literal corresponding to the control flow node `n` has class `cls`.
*
* Helper predicate for `literal_instantiation`. Prevents a bad join with
* `PointsToContext::appliesTo` from occuring.
*/
pragma[nomagic]
private predicate literal_node_class(ControlFlowNode n, ClassObjectInternal cls) {
n instanceof ListNode and cls = ObjectInternal::builtin("list")
or
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
or
n instanceof SetNode and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
or
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
or
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
}
predicate literal_instantiation(ControlFlowNode n, ClassObjectInternal cls, PointsToContext context) {
context.appliesTo(n) and
(
n instanceof ListNode and cls = ObjectInternal::builtin("list")
or
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
or
n instanceof SetNode and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
or
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
or
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
or
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
)
literal_node_class(n, cls)
}
predicate super_instantiation(

View File

@@ -19,6 +19,19 @@ private predicate re_module_function(string name, int flags) {
name = "subn" and flags = 4
}
/**
* Gets the names and corresponding values of attributes of the `re` module that are likely to be
* methods taking regular expressions as arguments.
*
* This is a helper predicate that fixes a bad join order, and should not be inlined without checking
* that this is safe.
*/
pragma[nomagic]
private Value relevant_re_attr(string name) {
result = Module::named("re").attr(name) and
name != "escape"
}
/**
* Holds if `s` is used as a regex with the `re` module, with the regex-mode `mode` (if known).
* If regex mode is not known, `mode` will be `"None"`.
@@ -28,8 +41,7 @@ predicate used_as_regex(Expr s, string mode) {
/* Call to re.xxx(regex, ... [mode]) */
exists(CallNode call, string name |
call.getArg(0).pointsTo(_, _, s.getAFlowNode()) and
call.getFunction().pointsTo(Module::named("re").attr(name)) and
not name = "escape"
call.getFunction().pointsTo(relevant_re_attr(name))
|
mode = "None"
or

View File

@@ -104,7 +104,8 @@ class Builtin extends @py_cobject {
) and
exists(string quoted_string |
quoted_string = this.getName() and
result = quoted_string.regexpCapture("[bu]'([\\s\\S]*)'", 1)
// Remove prefix ("b" or "u") and leading and trailing quotes (both "'").
result = quoted_string.substring(2, quoted_string.length() - 1)
)
}
}