From df9efbe778517a610c131012ba16daec4886bf3d Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Mon, 31 Jan 2022 23:11:18 +0100 Subject: [PATCH] get mimimal def nodes to work in python --- python/ql/lib/semmle/python/ApiGraphs.qll | 221 +++++++++++++++++- .../test/library-tests/ApiGraphs/def.expected | 0 python/ql/test/library-tests/ApiGraphs/def.ql | 36 +++ .../test/library-tests/ApiGraphs/deftest1.py | 6 + 4 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 python/ql/test/library-tests/ApiGraphs/def.expected create mode 100644 python/ql/test/library-tests/ApiGraphs/def.ql create mode 100644 python/ql/test/library-tests/ApiGraphs/deftest1.py diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index de45b3be02a..f4ead33ae7a 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -39,6 +39,30 @@ module API { ) } + /** + * Gets a data-flow node corresponding to the right-hand side of a definition of the API + * component represented by this node. + * + * For example, in the property write `foo.bar = x`, variable `x` is the the right-hand side + * of a write to the `bar` property of `foo`. + * + * Note that for parameters, it is the arguments flowing into that parameter that count as + * right-hand sides of the definition, not the declaration of the parameter itself. + * Consequently, in : + * ```python + * from mypkg import foo; + * foo.bar(x) + * ``` + * `x` is the right-hand side of a definition of the first parameter of `bar` from the `mypkg.foo` module. + */ + DataFlow::Node getARhs() { Impl::rhs(this, result) } + + /** + * Gets a data-flow node that may interprocedurally flow to the right-hand side of a definition + * of the API component represented by this node. + */ + DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.getARhs()) } + /** * Gets an immediate use of the API component represented by this node. * @@ -55,7 +79,7 @@ module API { /** * Gets a call to the function represented by this API component. */ - DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() } + DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() } // TODO: Make a API::CallNode. /** * Gets a node representing member `m` of this API component. @@ -92,6 +116,27 @@ module API { */ Node getReturn() { result = this.getASuccessor(Label::return()) } + /** + * Gets a node representing the `i`th parameter of the function represented by this node. + * + * This predicate may have multiple results when there are multiple invocations of this API component. + * Consider using `getAnInvocation()` if there is a need to distingiush between individual calls. + */ + Node getParameter(int i) { result = this.getASuccessor(Label::parameter(i)) } + + /** + * Gets the number of parameters of the function represented by this node. + */ + int getNumParameter() { result = max(int s | exists(this.getParameter(s))) + 1 } + + /** + * Gets a node representing the last parameter of the function represented by this node. + * + * This predicate may have multiple results when there are multiple invocations of this API component. + * Consider using `getACall()` if there is a need to distingiush between individual calls. + */ + Node getLastParameter() { result = this.getParameter(this.getNumParameter() - 1) } + /** * Gets a node representing a subclass of the class represented by this node. */ @@ -137,7 +182,7 @@ module API { /** * Gets the data-flow node that gives rise to this node, if any. */ - DataFlow::Node getInducingNode() { this = Impl::MkUse(result) } + DataFlow::Node getInducingNode() { this = Impl::MkUse(result) or this = Impl::MkDef(result) } /** * Holds if this element is at the specified location. @@ -210,6 +255,17 @@ module API { } } + /** A node corresponding to the rhs of an API component. */ + class Def extends Node, Impl::TDef { + override string toString() { + exists(string type | this = Impl::MkDef(_) and type = "Def " | + result = type + this.getPath() + or + not exists(this.getPath()) and result = type + "with no path" + ) + } + } + /** Gets the root node. */ Root root() { any() } @@ -325,10 +381,13 @@ module API { name = "builtins" } or /** A use of an API member at the node `nd`. */ - MkUse(DataFlow::Node nd) { use(_, _, nd) } + MkUse(DataFlow::Node nd) { use(_, _, nd) } or + MkDef(DataFlow::Node nd) { rhs(_, _, nd) } class TUse = MkModuleImport or MkUse; + class TDef = MkDef; + /** * Holds if the dotted module name `sub` refers to the `member` member of `base`. * @@ -381,6 +440,77 @@ module API { ) } + /** + * Holds if `rhs` is the right-hand side of a definition of a node that should have an + * incoming edge from `base` labeled `lbl` in the API graph. + */ + cached + predicate rhs(TApiNode base, Label::ApiLabel lbl, DataFlow::Node rhs) { + /* + * exists(string m, string prop | // TODO: Figure out module exports in Python + * base = MkModuleExport(m) and + * lbl = Label::member(prop) and + * exports(m, prop, rhs) + * ) + * or + */ + + exists(DataFlow::Node def, DataFlow::LocalSourceNode pred | + rhs(base, def) and pred = trackDefNode(def) + | + // from `x` to a definition of `x.prop` + exists(DataFlow::AttrWrite pw | pw = pred.getAnAttributeWrite() | + lbl = Label::memberFromRef(pw) and + rhs = pw.getValue() + ) + // or + // special case: from `require('m')` to an export of `prop` in `m` + // TODO: Figure out if this is needed. + /* + * exists(Import imp, Module m, string prop | + * pred = imp.getImportedModuleNode() and + * m = imp.getImportedModule() and + * lbl = Label::member(prop) and + * rhs = m.getAnExportedValue(prop) + * ) + * or + * // TODO: + * exists(DataFlow::FunctionNode fn | fn = pred | + * not fn.getFunction().isAsync() and + * lbl = Label::return() and + * rhs = fn.getAReturn() + * ) + * or + * lbl = Label::promised() and + * PromiseFlow::storeStep(rhs, pred, Promises::valueProp()) + */ + + ) + or + /* + * or // TODO: + * exists(DataFlow::FunctionNode f | + * base = MkAsyncFuncResult(f) and + * lbl = Label::promised() and + * rhs = f.getAReturn() + * ) + */ + + exists(int i | + lbl = Label::parameter(i) and + argumentPassing(base, i, rhs) + ) + /* + * or // TODO: + * exists(DataFlow::SourceNode src, DataFlow::PropWrite pw | + * use(base, src) and pw = trackUseNode(src).getAPropertyWrite() and rhs = pw.getRhs() + * | + * lbl = Label::memberFromRef(pw) + * ) + */ + + } + /** * Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled * `lbl` in the API graph. @@ -419,6 +549,21 @@ module API { ) ) or + exists(DataFlow::Node def, CallableExpr fn | + rhs(base, def) and fn = trackDefNode(def).asExpr() + | + exists(int i | + lbl = Label::parameter(i) and + ref.asExpr() = fn.getInnerScope().getArg(i) + ) + /* + * or // TODO: Figure out self. + * lbl = Label::receiver() and + * ref = fn.getReceiver() + */ + + ) + or // Built-ins, treated as members of the module `builtins` base = MkModuleImport("builtins") and lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name))) @@ -466,6 +611,53 @@ module API { exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t)) } + /** + * Holds if `arg` is passed as the `i`th argument to a use of `base`, either by means of a + * full invocation, or in a partial function application. + * + * The receiver is considered to be argument -1. + */ + private predicate argumentPassing(TApiNode base, int i, DataFlow::Node arg) { + exists(DataFlow::Node use, DataFlow::LocalSourceNode pred | + use(base, use) and pred = trackUseNode(use, _) + | + arg = pred.getACall().getArg(i) + /* + * or // TODO: Figure out self in argument. + * arg = pred.getACall().getReceiver() and + * i = -1 + */ + + ) + } + + /** + * Gets a node that inter-procedurally flows into `nd`, which is a definition of some node. + */ + cached + DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd) { + result = trackDefNode(nd, DataFlow::TypeBackTracker::end()) + } + + private DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) { + t.start() and + rhs(_, nd) and + result = nd.getALocalSource() + or + // TODO: Figure out module exports in Python, and if this thing is needed. + // additional backwards step from `require('m')` to `exports` or `module.exports` in m + /* + * exists(Import imp | imp.getImportedModuleNode() = trackDefNode(nd, t.continue()) | + * result = DataFlow::exportsVarNode(imp.getImportedModule()) + * or + * result = DataFlow::moduleVarNode(imp.getImportedModule()).getAPropertyRead("exports") + * ) + * or + */ + + exists(DataFlow::TypeBackTracker t2 | result = trackDefNode(nd, t2).backtrack(t2, t)) + } + /** * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. * @@ -477,6 +669,16 @@ module API { not result instanceof DataFlow::ModuleVariableNode } + /** + * Holds if `rhs` is the right-hand side of a definition of node `nd`. + */ + cached + predicate rhs(TApiNode nd, DataFlow::Node rhs) { + // exists(string m | nd = MkModuleExport(m) | exports(m, rhs)) // TODO: Figure out module exported in Py. + // or + nd = MkDef(rhs) + } + /** * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. */ @@ -503,6 +705,11 @@ module API { use(pred, lbl, ref) and succ = MkUse(ref) ) + or + exists(DataFlow::Node rhs | + rhs(pred, lbl, rhs) and + succ = MkDef(rhs) + ) } /** @@ -539,7 +746,9 @@ module API { } or MkLabelUnknownMember() or MkLabelParameter(int i) { - none() // TODO: Fill in when adding def nodes + exists(any(DataFlow::CallCfgNode c).getArg(i)) + or + i = [-1 .. 10] // TODO: Def nodes, figure out how to make this prettier. } or MkLabelReturn() or MkLabelSubclass() or @@ -582,6 +791,7 @@ module API { LabelParameter() { this = MkLabelParameter(i) } + // TODO: Named parameters, spread arguments. override string toString() { result = "getParameter(" + i + ")" } /** Gets the index of the parameter for this label. */ @@ -627,6 +837,9 @@ module API { result = unknownMember() } + /** Gets the `parameter` edge label for parameter `i`. */ + LabelParameter parameter(int i) { result.getIndex() = i } + /** Gets the `return` edge label. */ LabelReturn return() { any() } diff --git a/python/ql/test/library-tests/ApiGraphs/def.expected b/python/ql/test/library-tests/ApiGraphs/def.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/ApiGraphs/def.ql b/python/ql/test/library-tests/ApiGraphs/def.ql new file mode 100644 index 00000000000..345d20417f7 --- /dev/null +++ b/python/ql/test/library-tests/ApiGraphs/def.ql @@ -0,0 +1,36 @@ +import python +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +import semmle.python.ApiGraphs + +class ApiDefTest extends InlineExpectationsTest { + ApiDefTest() { this = "ApiDefTest" } + + override string getARelevantTag() { result = "def" } + + private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) { + n = a.getARhs() and + l = n.getLocation() and + // Module variable nodes have no suitable location, so it's best to simply exclude them entirely + // from the inline tests. + not n instanceof DataFlow::ModuleVariableNode and + exists(l.getFile().getRelativePath()) and + n.getLocation().getFile().getBaseName().matches("def%.py") + } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) | + tag = "def" and + // Only report the longest path on this line: + value = + max(API::Node a2, Location l2 | + relevant_node(a2, _, l2) and + l2.getFile() = location.getFile() and + l2.getStartLine() = location.getStartLine() + | + a2.getPath() + ) and + element = n.toString() + ) + } +} diff --git a/python/ql/test/library-tests/ApiGraphs/deftest1.py b/python/ql/test/library-tests/ApiGraphs/deftest1.py new file mode 100644 index 00000000000..1425a258205 --- /dev/null +++ b/python/ql/test/library-tests/ApiGraphs/deftest1.py @@ -0,0 +1,6 @@ +from mypkg import foo #$ use=moduleImport("mypkg").getMember("foo") + +def callback(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0).getParameter(0) + x.baz() #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0).getParameter(0).getMember("baz").getReturn() + +foo.bar(callback) #$ def=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("bar").getReturn()