From 60b5af215feb82774e278fa3fa8edcb2358c8b10 Mon Sep 17 00:00:00 2001 From: Erik Krogh Kristensen Date: Tue, 7 Dec 2021 11:16:25 +0100 Subject: [PATCH] cached stages iteration 2 --- python/ql/lib/semmle/python/ApiGraphs.qll | 2 + python/ql/lib/semmle/python/AstExtended.qll | 7 ++- python/ql/lib/semmle/python/Exprs.qll | 7 ++- python/ql/lib/semmle/python/Flow.qll | 10 ++-- python/ql/lib/semmle/python/Module.qll | 3 ++ .../dataflow/new/internal/TypeTracker.qll | 2 + .../semmle/python/internal/CachedStages.qll | 49 +++++++++++++++++++ .../semmle/python/objects/ObjectInternal.qll | 7 ++- 8 files changed, 81 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 0c66d3224f2..a6e81fb5732 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -9,6 +9,7 @@ // Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`. private import python as py import semmle.python.dataflow.new.DataFlow +private import semmle.python.internal.CachedStages /** * Provides classes and predicates for working with APIs used in a database. @@ -683,6 +684,7 @@ module API { */ cached DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) { + Stages::TypeTracking::ref() and result = trackUseNode(src, DataFlow::TypeTracker::end()) and not result instanceof DataFlow::ModuleVariableNode } diff --git a/python/ql/lib/semmle/python/AstExtended.qll b/python/ql/lib/semmle/python/AstExtended.qll index 25a8784c615..2f8184f2c21 100644 --- a/python/ql/lib/semmle/python/AstExtended.qll +++ b/python/ql/lib/semmle/python/AstExtended.qll @@ -1,4 +1,5 @@ import python +private import semmle.python.internal.CachedStages /** A syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */ abstract class AstNode extends AstNode_ { @@ -20,6 +21,7 @@ abstract class AstNode extends AstNode_ { ControlFlowNode getAFlowNode() { py_flow_bb_node(result, this, _, _) } /** Gets the location for this AST node */ + cached Location getLocation() { none() } /** @@ -106,7 +108,10 @@ class Comprehension extends Comprehension_, AstNode { override string toString() { result = "Comprehension" } - override Location getLocation() { result = Comprehension_.super.getLocation() } + override Location getLocation() { + Stages::SSA::ref() and + result = Comprehension_.super.getLocation() + } override AstNode getAChildNode() { result = this.getASubExpression() } diff --git a/python/ql/lib/semmle/python/Exprs.qll b/python/ql/lib/semmle/python/Exprs.qll index cef83af0f5a..f52bc93383d 100644 --- a/python/ql/lib/semmle/python/Exprs.qll +++ b/python/ql/lib/semmle/python/Exprs.qll @@ -1,6 +1,7 @@ import python private import semmle.python.pointsto.PointsTo private import semmle.python.objects.ObjectInternal +private import semmle.python.internal.CachedStages /** An expression */ class Expr extends Expr_, AstNode { @@ -8,7 +9,11 @@ class Expr extends Expr_, AstNode { override Scope getScope() { py_scopes(this, result) } /** Gets a textual representation of this element. */ - override string toString() { result = "Expression" } + cached + override string toString() { + Stages::SSA::ref() and + result = "Expression" + } /** Gets the module in which this expression occurs */ Module getEnclosingModule() { result = this.getScope().getEnclosingModule() } diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll index bbc4fe7f79e..23366410cb4 100755 --- a/python/ql/lib/semmle/python/Flow.qll +++ b/python/ql/lib/semmle/python/Flow.qll @@ -1001,12 +1001,16 @@ class BasicBlock extends @py_flow_node { string toString() { result = "BasicBlock" } /** Whether this basic block strictly dominates the other */ - pragma[nomagic] - predicate strictlyDominates(BasicBlock other) { other.getImmediateDominator+() = this } + cached + predicate strictlyDominates(BasicBlock other) { + Stages::SSA::ref() and + other.getImmediateDominator+() = this + } /** Whether this basic block dominates the other */ - pragma[nomagic] + cached predicate dominates(BasicBlock other) { + Stages::SSA::ref() and this = other or this.strictlyDominates(other) diff --git a/python/ql/lib/semmle/python/Module.qll b/python/ql/lib/semmle/python/Module.qll index 775e3df2069..0cc96711374 100644 --- a/python/ql/lib/semmle/python/Module.qll +++ b/python/ql/lib/semmle/python/Module.qll @@ -1,6 +1,7 @@ import python private import semmle.python.objects.ObjectAPI private import semmle.python.objects.Modules +private import semmle.python.internal.CachedStages /** * A module. This is the top level element in an AST, corresponding to a source file. @@ -221,7 +222,9 @@ private predicate transitively_imported_from_entry_point(File file) { ) } +cached string moduleNameFromFile(Container file) { + Stages::SSA::ref() and exists(string basename | basename = moduleNameFromBase(file) and legalShortName(basename) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll index c463d213920..e3d2f60eab0 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll @@ -1,6 +1,7 @@ /** Step Summaries and Type Tracking */ private import TypeTrackerSpecific +private import semmle.python.internal.CachedStages /** * A string that may appear as the name of a piece of content. This will usually include things like: @@ -40,6 +41,7 @@ private module Cached { /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ cached TypeTracker append(TypeTracker tt, StepSummary step) { + Stages::TypeTracking::ref() and exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) | step = LevelStep() and result = tt or diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll index 13291171954..d5ec7aa5fdc 100644 --- a/python/ql/lib/semmle/python/internal/CachedStages.qll +++ b/python/ql/lib/semmle/python/internal/CachedStages.qll @@ -37,6 +37,7 @@ module Stages { */ cached module SSA { + // TODO: This is more a "basic AST", not a "SSA" stage. /** * Always holds. * Ensures that a predicate is evaluated as part of the Ast stage. @@ -47,6 +48,10 @@ module Stages { private import semmle.python.essa.SsaDefinitions as SsaDefinitions private import semmle.python.essa.SsaCompute as SsaCompute private import semmle.python.essa.Essa as Essa + private import semmle.python.Module as PyModule + private import semmle.python.Exprs as Exprs + private import semmle.python.AstExtended as AstExtended + private import semmle.python.Flow as PyFlow /** * DONT USE! @@ -61,6 +66,47 @@ module Stages { SsaCompute::SsaDefinitions::reachesEndOfBlock(_, _, _, _) or exists(any(Essa::PhiFunction p).getInput(_)) + or + exists(PyModule::moduleNameFromFile(_)) + or + exists(any(Exprs::Expr e).toString()) + or + exists(any(AstExtended::AstNode n).getLocation()) + or + exists(any(PyFlow::BasicBlock b).getImmediateDominator()) + or + any(PyFlow::BasicBlock b).strictlyDominates(_) + or + any(PyFlow::BasicBlock b).dominates(_) + } + } + + /** + * The `TypeTracking` stage. + */ + cached + module TypeTracking { + /** + * Always holds. + * Ensures that a predicate is evaluated as part of the Ast stage. + */ + cached + predicate ref() { 1 = 1 } + + private import semmle.python.dataflow.new.DataFlow::DataFlow as NewDataFlow + private import semmle.python.ApiGraphs::API as API + + /** + * DONT USE! + * Contains references to each predicate that use the above `ref` predicate. + */ + cached + predicate backref() { + 1 = 1 + or + exists(any(NewDataFlow::TypeTracker t).append(_)) + or + exists(any(API::Node n).getAMember().getAUse()) } } @@ -83,6 +129,7 @@ module Stages { private import semmle.python.types.Object as TypeObject private import semmle.python.objects.TObject as TObject private import semmle.python.Flow as Flow + private import semmle.python.objects.ObjectInternal as ObjectInternal /** * DONT USE! @@ -107,6 +154,8 @@ module Stages { exists(TObject::TObject f) or exists(any(Flow::ControlFlowNode c).toString()) + or + exists(any(ObjectInternal::ObjectInternal o).toString()) } } diff --git a/python/ql/lib/semmle/python/objects/ObjectInternal.qll b/python/ql/lib/semmle/python/objects/ObjectInternal.qll index 16add4c370d..e60b506c449 100644 --- a/python/ql/lib/semmle/python/objects/ObjectInternal.qll +++ b/python/ql/lib/semmle/python/objects/ObjectInternal.qll @@ -15,9 +15,11 @@ import semmle.python.objects.Callables import semmle.python.objects.Constants import semmle.python.objects.Sequences import semmle.python.objects.Descriptors +private import semmle.python.internal.CachedStages class ObjectInternal extends TObject { /** Gets a textual representation of this element. */ + cached abstract string toString(); /** @@ -213,7 +215,10 @@ class ObjectInternal extends TObject { class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject { override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) } - override string toString() { result = this.getBuiltin().getClass().getName() + " object" } + override string toString() { + Stages::DataFlow::ref() and + result = this.getBuiltin().getClass().getName() + " object" + } override boolean booleanValue() { // TO DO ... Depends on class. `result = this.getClass().instancesBooleanValue()`