From 45bb4a0ee5344ec101618301bec112bbecfa242c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 8 Feb 2024 12:48:15 +0100 Subject: [PATCH 1/3] python: remove `TaintStepFromSummary` as it should be covered by `SummarizedCallableFromModel` Also move things around, to look more like the Ruby code. --- .../python/dataflow/new/FlowSummary.qll | 31 +------------- .../python/frameworks/data/ModelsAsData.qll | 41 ++++++++++++------- 2 files changed, 27 insertions(+), 45 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 800c9592dcc..9c3033e6126 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -2,7 +2,6 @@ private import python private import semmle.python.dataflow.new.DataFlow -private import semmle.python.frameworks.data.ModelsAsData private import semmle.python.ApiGraphs private import internal.FlowSummaryImpl as Impl private import internal.DataFlowUtil @@ -11,6 +10,7 @@ private import internal.DataFlowPrivate // import all instances below private module Summaries { private import semmle.python.Frameworks + private import semmle.python.frameworks.data.ModelsAsData } deprecated class SummaryComponent = Impl::Private::SummaryComponent; @@ -36,32 +36,3 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari } deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; - -private class SummarizedCallableFromModel extends SummarizedCallable { - string type; - string path; - - SummarizedCallableFromModel() { - ModelOutput::relevantSummaryModel(type, path, _, _, _) and - this = type + ";" + path - } - - override CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } - - override ArgumentNode getACallback() { - exists(API::Node base | - ModelOutput::resolvedSummaryRefBase(type, path, base) and - result = base.getAValueReachableFromSource() - ) - } - - override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | - kind = "value" and - preservesValue = true - or - kind = "taint" and - preservesValue = false - ) - } -} diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll index f8d7ae75ad0..34e48439271 100644 --- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -17,7 +17,7 @@ import Shared::ModelOutput as ModelOutput private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.DataFlow private import semmle.python.ApiGraphs -private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.dataflow.new.FlowSummary /** * A remote flow source originating from a CSV source row. @@ -28,20 +28,31 @@ private class RemoteFlowSourceFromCsv extends RemoteFlowSource { override string getSourceType() { result = "Remote flow (from model)" } } -/** - * Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes. - */ -private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) { - exists(API::Node predNode, API::Node succNode | - Specific::summaryStep(predNode, succNode, kind) and - pred = predNode.asSink() and - succ = succNode.asSource() - ) -} +private class SummarizedCallableFromModel extends SummarizedCallable { + string type; + string path; -/** Taint steps induced by summary models of kind `taint`. */ -private class TaintStepFromSummary extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node pred, DataFlow::Node succ) { - summaryStepNodes(pred, succ, "taint") + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _) and + this = type + ";" + path + } + + override DataFlow::CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } + + override DataFlow::ArgumentNode getACallback() { + exists(API::Node base | + ModelOutput::resolvedSummaryRefBase(type, path, base) and + result = base.getAValueReachableFromSource() + ) + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) } } From 580e68d5de4740a2d7ce7d1af38dfe814a853a7a Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 9 Feb 2024 13:51:16 +0100 Subject: [PATCH 2/3] python: add support for lower bound position --- .../dataflow/new/internal/DataFlowDispatch.qll | 16 ++++++++++++++++ .../dataflow/new/internal/FlowSummaryImpl.qll | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 87a278e0f6b..9bf0ec96084 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -57,6 +57,9 @@ newtype TParameterPosition = // parameter positions available. FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index) } or + TPositionalParameterLowerBoundPosition(int pos) { + FlowSummaryImpl::ParsePositions::isParsedArgumentLowerBoundPosition(_, pos) + } or TKeywordParameterPosition(string name) { name = any(Parameter p).getName() or @@ -91,6 +94,9 @@ class ParameterPosition extends TParameterPosition { /** Holds if this position represents a positional parameter at (0-based) `index`. */ predicate isPositional(int index) { this = TPositionalParameterPosition(index) } + /** Holds if this position represents any positional parameter starting from position `pos`. */ + predicate isPositionalLowerBound(int pos) { this = TPositionalParameterLowerBoundPosition(pos) } + /** Holds if this position represents a keyword parameter named `name`. */ predicate isKeyword(string name) { this = TKeywordParameterPosition(name) } @@ -123,6 +129,8 @@ class ParameterPosition extends TParameterPosition { or exists(int index | this.isPositional(index) and result = "position " + index) or + exists(int pos | this.isPositionalLowerBound(pos) and result = "position " + pos + "..") + or exists(string name | this.isKeyword(name) and result = "keyword " + name) or exists(int index | this.isStarArgs(index) and result = "*args at " + index) @@ -211,6 +219,10 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { or exists(int index | ppos.isPositional(index) and apos.isPositional(index)) or + exists(int index1, int index2 | + ppos.isPositionalLowerBound(index1) and apos.isPositional(index2) and index2 >= index1 + ) + or exists(string name | ppos.isKeyword(name) and apos.isKeyword(name)) or exists(int index | ppos.isStarArgs(index) and apos.isStarArgs(index)) @@ -360,6 +372,10 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction { result.getParameter() = func.getArg(index + this.positionalOffset()) ) or + exists(int index1, int index2 | ppos.isPositionalLowerBound(index1) and index2 >= index1 | + result.getParameter() = func.getArg(index2 + this.positionalOffset()) + ) + or exists(string name | ppos.isKeyword(name) | result.getParameter() = func.getArgByName(name)) or // `*args` diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 4a55d38edb6..a673a188133 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -195,6 +195,11 @@ module ParsePositions { i = AccessPath::parseInt(c) } + predicate isParsedArgumentLowerBoundPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseLowerBound(c) + } + predicate isParsedKeywordArgumentPosition(string c, string argName) { isArgBody(c) and c = argName + ":" From 3601773856c09fe559c45b5227329ce992a38869 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 8 Mar 2024 14:59:28 +0100 Subject: [PATCH 3/3] python: support encoding lower bound --- .../semmle/python/dataflow/new/internal/FlowSummaryImpl.qll | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index a673a188133..6a7463ccb9b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -27,6 +27,11 @@ module Input implements InputSig { result = i.toString() ) or + exists(int i | + pos.isPositionalLowerBound(i) and + result = i + ".." + ) + or exists(string name | pos.isKeyword(name) and result = name + ":"