Merge branch 'main' into python-UBV

This commit is contained in:
Rasmus Wriedt Larsen
2023-05-22 11:53:56 +02:00
1474 changed files with 73298 additions and 61895 deletions

View File

@@ -1,3 +1,9 @@
## 0.9.1
### Minor Analysis Improvements
* Added support for querying the contents of YAML files.
## 0.9.0
### Deprecated APIs

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Type tracking is now aware of reads of captured variables (variables defined in an outer scope). This leads to a richer API graph, and may lead to more results in some queries.

View File

@@ -1,4 +1,5 @@
---
category: minorAnalysis
---
## 0.9.1
### Minor Analysis Improvements
* Added support for querying the contents of YAML files.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.9.0
lastReleaseVersion: 0.9.1

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.9.1-dev
version: 0.9.2-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
@@ -12,3 +12,4 @@ dependencies:
codeql/yaml: ${workspace}
dataExtensions:
- semmle/python/frameworks/**/model.yml
warnOnImplicitThis: true

View File

@@ -987,7 +987,7 @@ module API {
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
Stages::TypeTracking::ref() and
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
result instanceof DataFlow::ExprNode
result instanceof DataFlow::LocalSourceNodeNotModuleVariableNode
}
/**

View File

@@ -421,6 +421,24 @@ module RegexExecution {
}
}
/**
* A node where a string is interpreted as a regular expression,
* for instance an argument to `re.compile`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegExpInterpretation::Range` instead.
*/
class RegExpInterpretation extends DataFlow::Node instanceof RegExpInterpretation::Range { }
/** Provides a class for modeling regular expression interpretations. */
module RegExpInterpretation {
/**
* A node where a string is interpreted as a regular expression,
* for instance an argument to `re.compile`.
*/
abstract class Range extends DataFlow::Node { }
}
/** Provides classes for modeling XML-related APIs. */
module XML {
/**

View File

@@ -7,7 +7,7 @@
*/
import python
import semmle.python.RegexTreeView
import semmle.python.regexp.RegexTreeView
import semmle.python.Yaml
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()

File diff suppressed because it is too large Load Diff

View File

@@ -51,7 +51,7 @@ private CryptographicAlgorithm getBestAlgorithmForName(string name) {
*/
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/** Gets a textual representation of this element. */
string toString() { result = getName() }
string toString() { result = this.getName() }
/**
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).

View File

@@ -28,6 +28,27 @@ module SummaryComponent {
/** Gets a summary component that represents a list element. */
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
/** Gets a summary component that represents a set element. */
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
/** Gets a summary component that represents a tuple element. */
SummaryComponent tupleElement(int index) {
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
}
/** Gets a summary component that represents a dictionary element. */
SummaryComponent dictionaryElement(string key) {
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
}
/** Gets a summary component that represents a dictionary element at any key. */
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
/** Gets a summary component that represents an attribute element. */
SummaryComponent attribute(string attr) {
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
}
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
}

View File

@@ -2,9 +2,10 @@
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regexp.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.regexp.internal.RegExpTracking
/**
* Provides utility predicates related to regular expressions.
@@ -25,18 +26,18 @@ deprecated module RegExpPatterns {
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
private RegExpSink sink;
RegExpPatternSource() { astNode = this.asExpr() }
RegExpPatternSource() { this = regExpSource(sink) }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
RegExpSink getAParse() { result = sink }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
RegExpTerm getRegExpTerm() { result.getRegex() = this.asExpr() }
}

View File

@@ -58,6 +58,9 @@ module Consistency {
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
none()
}
/** Holds if `n` should be excluded from the consistency test `identityLocalStep`. */
predicate identityLocalStepExclude(Node n) { none() }
}
private class RelevantNode extends Node {
@@ -287,4 +290,10 @@ module Consistency {
not exists(unique(ContentApprox approx | approx = getContentApprox(c))) and
msg = "Non-unique content approximation."
}
query predicate identityLocalStep(Node n, string msg) {
simpleLocalFlowStep(n, n) and
not any(ConsistencyConfiguration c).identityLocalStepExclude(n) and
msg = "Node steps to itself"
}
}

View File

@@ -1,398 +0,0 @@
/**
* DEPRECATED: Use `Global` and `GlobalWithState` instead.
*
* Provides a `Configuration` class backwards-compatible interface to the data
* flow library.
*/
private import DataFlowImplCommon
private import DataFlowImplSpecific::Private
import DataFlowImplSpecific::Public
private import DataFlowImpl
import DataFlowImplCommonPublic
import FlowStateString
private import codeql.util.Unit
/**
* A configuration of interprocedural data flow analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the global data flow library must define its own unique extension
* of this abstract class. To create a configuration, extend this class with
* a subclass whose characteristic predicate is a unique singleton string.
* For example, write
*
* ```ql
* class MyAnalysisConfiguration extends DataFlow::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isBarrier`.
* // Optionally override `isAdditionalFlowStep`.
* }
* ```
* Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
* the edges are those data-flow steps that preserve the value of the node
* along with any additional edges defined by `isAdditionalFlowStep`.
* Specifying nodes in `isBarrier` will remove those nodes from the graph, and
* specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
* and/or out-going edges from those nodes, respectively.
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but two classes extending
* `DataFlow::Configuration` should never depend on each other. One of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
*/
abstract class Configuration extends string {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Node source) { none() }
/**
* Holds if `source` is a relevant data flow source with the given initial
* `state`.
*/
predicate isSource(Node source, FlowState state) { none() }
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Node sink) { none() }
/**
* Holds if `sink` is a relevant data flow sink accepting `state`.
*/
predicate isSink(Node sink, FlowState state) { none() }
/**
* Holds if data flow through `node` is prohibited. This completely removes
* `node` from the data flow graph.
*/
predicate isBarrier(Node node) { none() }
/**
* Holds if data flow through `node` is prohibited when the flow state is
* `state`.
*/
predicate isBarrier(Node node, FlowState state) { none() }
/** Holds if data flow into `node` is prohibited. */
predicate isBarrierIn(Node node) { none() }
/** Holds if data flow out of `node` is prohibited. */
predicate isBarrierOut(Node node) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited.
*/
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
*
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
}
/**
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
* This can be overridden to a smaller value to improve performance (a
* value of 0 disables field flow), or a larger value to get more results.
*/
int fieldFlowBranchLimit() { result = 2 }
/**
* Gets a data flow configuration feature to add restrictions to the set of
* valid flow paths.
*
* - `FeatureHasSourceCallContext`:
* Assume that sources have some existing call context to disallow
* conflicting return-flow directly following the source.
* - `FeatureHasSinkCallContext`:
* Assume that sinks have some existing call context to disallow
* conflicting argument-to-parameter flow directly preceding the sink.
* - `FeatureEqualSourceSinkCallContext`:
* Implies both of the above and additionally ensures that the entire flow
* path preserves the call context.
*
* These features are generally not relevant for typical end-to-end data flow
* queries, but should only be used for constructing paths that need to
* somehow be pluggable in another path context.
*/
FlowFeature getAFeature() { none() }
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
predicate sourceGrouping(Node source, string sourceGroup) { none() }
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
predicate sinkGrouping(Node sink, string sinkGroup) { none() }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*/
predicate hasFlow(Node source, Node sink) { hasFlow(source, sink, this) }
/**
* Holds if data may flow from `source` to `sink` for this configuration.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate hasFlowPath(PathNode source, PathNode sink) { hasFlowPath(source, sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowTo(Node sink) { hasFlowTo(sink, this) }
/**
* Holds if data may flow from some source to `sink` for this configuration.
*/
predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) }
/**
* DEPRECATED: Use `FlowExploration<explorationLimit>` instead.
*
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
* measured in approximate number of interprocedural steps.
*/
deprecated int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
}
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
*/
abstract private class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSource(n, _)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n | this.isSink(n, _)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0
or
super.hasFlow(source, sink)
}
}
/** A bridge class to access the deprecated `isBarrierGuard`. */
private class BarrierGuardGuardedNodeBridge extends Unit {
abstract predicate guardedNode(Node n, Configuration config);
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
}
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
deprecated override predicate guardedNode(Node n, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g) and
n = g.getAGuardedNode()
)
}
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
}
}
private FlowState relevantState(Configuration config) {
config.isSource(_, result) or
config.isSink(_, result) or
config.isBarrier(_, result) or
config.isAdditionalFlowStep(_, result, _, _) or
config.isAdditionalFlowStep(_, _, _, result)
}
private newtype TConfigState =
TMkConfigState(Configuration config, FlowState state) {
state = relevantState(config) or state instanceof FlowStateEmpty
}
private Configuration getConfig(TConfigState state) { state = TMkConfigState(result, _) }
private FlowState getState(TConfigState state) { state = TMkConfigState(_, result) }
private predicate singleConfiguration() { 1 = strictcount(Configuration c) }
private module Config implements FullStateConfigSig {
class FlowState = TConfigState;
predicate isSource(Node source, FlowState state) {
getConfig(state).isSource(source, getState(state))
or
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
}
predicate isSink(Node sink, FlowState state) {
getConfig(state).isSink(sink, getState(state))
or
getConfig(state).isSink(sink) and getState(state) instanceof FlowStateEmpty
}
predicate isBarrier(Node node) { none() }
predicate isBarrier(Node node, FlowState state) {
getConfig(state).isBarrier(node, getState(state)) or
getConfig(state).isBarrier(node) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
}
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
predicate isAdditionalFlowStep(Node node1, Node node2) {
singleConfiguration() and
any(Configuration config).isAdditionalFlowStep(node1, node2)
}
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
getConfig(state1).isAdditionalFlowStep(node1, getState(state1), node2, getState(state2)) and
getConfig(state2) = getConfig(state1)
or
not singleConfiguration() and
getConfig(state1).isAdditionalFlowStep(node1, node2) and
state2 = state1
}
predicate allowImplicitRead(Node node, ContentSet c) {
any(Configuration config).allowImplicitRead(node, c)
}
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
predicate sourceGrouping(Node source, string sourceGroup) {
any(Configuration config).sourceGrouping(source, sourceGroup)
}
predicate sinkGrouping(Node sink, string sinkGroup) {
any(Configuration config).sinkGrouping(sink, sinkGroup)
}
predicate includeHiddenNodes() { any(Configuration config).includeHiddenNodes() }
}
private import Impl<Config> as I
/**
* A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
*/
class PathNode instanceof I::PathNode {
/** Gets a textual representation of this element. */
final string toString() { result = super.toString() }
/**
* Gets a textual representation of this element, including a textual
* representation of the call context.
*/
final string toStringWithContext() { result = super.toStringWithContext() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
final predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the underlying `Node`. */
final Node getNode() { result = super.getNode() }
/** Gets the `FlowState` of this node. */
final FlowState getState() { result = getState(super.getState()) }
/** Gets the associated configuration. */
final Configuration getConfiguration() { result = getConfig(super.getState()) }
/** Gets a successor of this node, if any. */
final PathNode getASuccessor() { result = super.getASuccessor() }
/** Holds if this node is a source. */
final predicate isSource() { super.isSource() }
/** Holds if this node is a grouping of source nodes. */
final predicate isSourceGroup(string group) { super.isSourceGroup(group) }
/** Holds if this node is a grouping of sink nodes. */
final predicate isSinkGroup(string group) { super.isSinkGroup(group) }
}
module PathGraph = I::PathGraph;
private predicate hasFlow(Node source, Node sink, Configuration config) {
exists(PathNode source0, PathNode sink0 |
hasFlowPath(source0, sink0, config) and
source0.getNode() = source and
sink0.getNode() = sink
)
}
private predicate hasFlowPath(PathNode source, PathNode sink, Configuration config) {
I::flowPath(source, sink) and source.getConfiguration() = config
}
private predicate hasFlowTo(Node sink, Configuration config) { hasFlow(_, sink, config) }
predicate flowsTo = hasFlow/3;

View File

@@ -974,6 +974,8 @@ predicate forceHighPrecision(Content c) { none() }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) {
n instanceof ModuleVariableNode
or
n instanceof SummaryNode
or
n instanceof SummaryParameterNode

View File

@@ -407,7 +407,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
}
/** Gets the module in which this variable appears. */

View File

@@ -335,7 +335,7 @@ module Public {
class NeutralCallable extends SummarizedCallableBase {
private Provenance provenance;
NeutralCallable() { neutralElement(this, provenance) }
NeutralCallable() { neutralSummaryElement(this, provenance) }
/**
* Holds if the neutral is auto generated.

View File

@@ -91,11 +91,11 @@ predicate summaryElement(
}
/**
* Holds if a neutral model exists for `c` with provenance `provenance`,
* Holds if a neutral summary model exists for `c` with provenance `provenance`,
* which means that there is no flow through `c`.
* Note. Neutral models have not been implemented for Python.
*/
predicate neutralElement(FlowSummary::SummarizedCallable c, string provenance) { none() }
predicate neutralSummaryElement(FlowSummary::SummarizedCallable c, string provenance) { none() }
/**
* Gets the summary component for specification component `c`, if any.
@@ -105,6 +105,27 @@ predicate neutralElement(FlowSummary::SummarizedCallable c, string provenance) {
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
c = "ListElement" and
result = FlowSummary::SummaryComponent::listElement()
or
c = "SetElement" and
result = FlowSummary::SummaryComponent::setElement()
or
exists(int index |
c.getAnArgument("TupleElement") = index.toString() and
result = FlowSummary::SummaryComponent::tupleElement(index)
)
or
exists(string key |
c.getAnArgument("DictionaryElement") = key and
result = FlowSummary::SummaryComponent::dictionaryElement(key)
)
or
c = "DictionaryElementAny" and
result = FlowSummary::SummaryComponent::dictionaryElementAny()
or
exists(string attr |
c.getAnArgument("Attribute") = attr and
result = FlowSummary::SummaryComponent::attribute(attr)
)
}
/** Gets the textual representation of a summary component in the format used for flow summaries. */

View File

@@ -51,6 +51,10 @@ class LocalSourceNode extends Node {
// We explicitly include any read of a global variable, as some of these may have local flow going
// into them.
this = any(ModuleVariableNode mvn).getARead()
or
// We include all scope entry definitions, as these act as the local source within the scope they
// enter.
this.asVar() instanceof ScopeEntryDefinition
}
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -133,6 +137,21 @@ class LocalSourceNode extends Node {
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
}
/**
* A LocalSourceNode that is not a ModuleVariableNode
* This class provides a positive formulation of that in its charpred.
*
* Aka FutureLocalSourceNode (see FutureWork below), but until the future is here...
*/
class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
cached
LocalSourceNodeNotModuleVariableNode() {
this instanceof ExprNode
or
this.asVar() instanceof ScopeEntryDefinition
}
}
/**
* A node that can be used for type tracking or type back-tracking.
*

View File

@@ -1,6 +1,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs
@@ -55,6 +56,8 @@ private module Cached {
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
}
}
@@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
@@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// don't provide that right now.
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and

View File

@@ -613,8 +613,17 @@ class TypeBackTracker extends TTypeBackTracker {
* also flow to `sink`.
*/
TypeTracker getACompatibleTypeTracker() {
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
hasCall = false or this.hasReturn() = false
exists(boolean hasCall, OptionalTypeTrackerContent c |
result = MkTypeTracker(hasCall, c) and
(
compatibleContents(c, content)
or
content = noContent() and c = content
)
|
hasCall = false
or
this.hasReturn() = false
)
}
}

View File

@@ -43,7 +43,19 @@ predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
predicate jumpStep(Node nodeFrom, Node nodeTo) {
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
or
capturedJumpStep(nodeFrom, nodeTo)
}
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
nodeFrom.asCfgNode() = def.getValue() and
var.getScope().getScope*() = nodeFrom.getScope()
)
}
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }

View File

@@ -2512,9 +2512,10 @@ module PrivateDjango {
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, RegExp regex |
regex.getAUse() = regexUse and
routeHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
@@ -2533,14 +2534,12 @@ module PrivateDjango {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class DjangoRouteRegex extends RegexString instanceof StrConst {
private class DjangoRouteRegex extends RegExpInterpretation::Range {
DjangoRegexRouteSetup rePathCall;
DjangoRouteRegex() {
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
}
DjangoRouteRegex() { this = rePathCall.getUrlPatternArg() }
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
}

View File

@@ -3015,6 +3015,17 @@ private module StdlibPrivate {
override string getKind() { result = Escaping::getRegexKind() }
}
/**
* A node interpreted as a regular expression.
* Speficically nodes where string values are interpreted as regular expressions.
*/
private class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
StdLibRegExpInterpretation() {
this =
API::moduleImport("re").getMember("compile").getACall().getParameter(0, "pattern").asSink()
}
}
// ---------------------------------------------------------------------------
// urllib
// ---------------------------------------------------------------------------
@@ -3779,6 +3790,138 @@ private module StdlibPrivate {
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// Flow summaries for functions contructing containers
// ---------------------------------------------------------------------------
/** A flow summary for `dict`. */
class DictSummary extends SummarizedCallable {
DictSummary() { this = "builtins.dict" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("dict").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[" + key + ":]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
ListSummary() { this = "builtins.list" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("list").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
TupleSummary() { this = "builtins.tuple" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("tuple").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
or
// TODO: We need to also translate iterable content such as list element
// but we currently lack TupleElementAny
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for set */
class SetSummary extends SummarizedCallable {
SetSummary() { this = "builtins.set" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("set").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
FrozensetSummary() { this = "builtins.frozenset" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("frozenset").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlowExt(input, output, preservesValue)
}
}
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
ReversedSummary() { this = "builtins.reversed" }

View File

@@ -384,12 +384,12 @@ module Tornado {
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
* Needs this subclass to be considered a RegExpInterpretation.
*/
private class TornadoRouteRegex extends RegexString instanceof StrConst {
private class TornadoRouteRegex extends RegExpInterpretation::Range {
TornadoRouteSetup setup;
TornadoRouteRegex() { setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this) }
TornadoRouteRegex() { this = setup.getUrlPatternArg() }
TornadoRouteSetup getRouteSetup() { result = setup }
}
@@ -423,9 +423,10 @@ module Tornado {
not result = requestHandler.getArg(0)
)
or
exists(Function requestHandler, TornadoRouteRegex regex |
exists(Function requestHandler, TornadoRouteRegex regexUse, RegExp regex |
regex.getAUse() = regexUse and
requestHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
regexUse.getRouteSetup() = this
|
// first group will have group number 1
result = requestHandler.getArg(regex.getGroupNumber(_, _))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,76 @@
/**
* Provides predicates that track strings to where they are used as regular expressions.
* This is implemented using TypeTracking in two phases:
*
* 1: An exploratory backwards analysis that imprecisely tracks all nodes that may be used as regular expressions.
* The exploratory phase ends with a forwards analysis from string constants that were reached by the backwards analysis.
* This is similar to the exploratory phase of the JavaScript global DataFlow library.
*
* 2: A precise type tracking analysis that tracks constant strings to where they are used as regular expressions.
* This phase keeps track of which strings and regular expressions end up in which places.
*/
import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts as Concepts
/** Gets a constant string value that may be used as a regular expression. */
DataFlow::LocalSourceNode strStart() { result.asExpr() instanceof StrConst }
private import semmle.python.regex as Regex
/** A node where regular expressions that flow to the node are used. */
class RegExpSink extends DataFlow::Node {
RegExpSink() {
this = any(Concepts::RegexExecution exec).getRegex()
or
this instanceof Concepts::RegExpInterpretation
}
}
/**
* Gets a dataflow node that may end up being in any regular expression execution.
* This is the backwards exploratory phase of the analysis.
*/
private DataFlow::TypeTrackingNode backwards(DataFlow::TypeBackTracker t) {
t.start() and
result = any(RegExpSink sink).getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
}
/**
* Gets a reference to a string that reaches any regular expression execution.
* This is the forwards exploratory phase of the analysis.
*/
private DataFlow::TypeTrackingNode forwards(DataFlow::TypeTracker t) {
t.start() and
result = backwards(DataFlow::TypeBackTracker::end()) and
result = strStart()
or
exists(DataFlow::TypeTracker t2 | result = forwards(t2).track(t2, t)) and
result = backwards(_)
}
/**
* Gets a node that has been tracked from the string constant `start` to some node.
* This is used to figure out where `start` is evaluated as a regular expression.
*
* The result of the exploratory phase is used to limit the size of the search space in this precise analysis.
*/
private DataFlow::TypeTrackingNode regexTracking(DataFlow::Node start, DataFlow::TypeTracker t) {
result = forwards(t) and
(
t.start() and
start = strStart() and
result = start
or
exists(DataFlow::TypeTracker t2 | result = regexTracking(start, t2).track(t2, t))
)
}
/** Gets a node holding a value for the regular expression that is evaluated at `re`. */
cached
DataFlow::Node regExpSource(RegExpSink re) {
regexTracking(result, DataFlow::TypeTracker::end()).flowsTo(re)
}

View File

@@ -55,16 +55,16 @@ deprecated class CustomPathNode extends TCustomPathNode {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
this.asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a textual representation of this element. */
string toString() {
result = asNode1().toString()
result = this.asNode1().toString()
or
result = asNode2().toString()
result = this.asNode2().toString()
}
}

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
private import semmle.python.regex

View File

@@ -5,14 +5,25 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeImpl
private import semmle.python.dataflow.new.Regexp as Regexp
private import codeql.regex.HostnameRegexp as Shared
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
class DataFlowNode = DataFlow::Node;
class RegExpPatternSource = Regexp::RegExpPatternSource;
class RegExpPatternSource extends DataFlow::Node instanceof Regexp::RegExpPatternSource {
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = super.getAParse() }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
TreeImpl::RegExpTerm getRegExpTerm() { result = super.getRegExpTerm() }
}
}
import Shared::Make<TreeImpl, Impl>

View File

@@ -1,3 +1,7 @@
## 0.7.1
No user-facing changes.
## 0.7.0
### Bug Fixes

View File

@@ -13,7 +13,7 @@
import python
import semmle.python.regex
from Regex r, int offset
from RegExp r, int offset
where
r.escapingChar(offset) and
r.getChar(offset + 1) = "b" and

View File

@@ -13,7 +13,7 @@
import python
import semmle.python.regex
predicate duplicate_char_in_class(Regex r, string char) {
predicate duplicate_char_in_class(RegExp r, string char) {
exists(int i, int j, int x, int y, int start, int end |
i != x and
j != y and
@@ -36,7 +36,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
)
}
from Regex r, string char
from RegExp r, string char
where duplicate_char_in_class(r, char)
select r,
"This regular expression includes duplicate character '" + char + "' in a set of characters."

View File

@@ -13,6 +13,6 @@
import python
import semmle.python.regex
from Regex r, string missing, string part
from RegExp r, string missing, string part
where r.getText().regexpMatch(".*\\(P<\\w+>.*") and missing = "?" and part = "named group"
select r, "Regular expression is missing '" + missing + "' in " + part + "."

View File

@@ -13,14 +13,14 @@
import python
import semmle.python.regex
predicate unmatchable_caret(Regex r, int start) {
predicate unmatchable_caret(RegExp r, int start) {
not r.getAMode() = "MULTILINE" and
not r.getAMode() = "VERBOSE" and
r.specialCharacter(start, start + 1, "^") and
not r.firstItem(start, start + 1)
}
from Regex r, int offset
from RegExp r, int offset
where unmatchable_caret(r, offset)
select r,
"This regular expression includes an unmatchable caret at offset " + offset.toString() + "."

View File

@@ -13,14 +13,14 @@
import python
import semmle.python.regex
predicate unmatchable_dollar(Regex r, int start) {
predicate unmatchable_dollar(RegExp r, int start) {
not r.getAMode() = "MULTILINE" and
not r.getAMode() = "VERBOSE" and
r.specialCharacter(start, start + 1, "$") and
not r.lastItem(start, start + 1)
}
from Regex r, int offset
from RegExp r, int offset
where unmatchable_dollar(r, offset)
select r,
"This regular expression includes an unmatchable dollar at offset " + offset.toString() + "."

View File

@@ -224,7 +224,7 @@ class ExternalApiUsedWithUntrustedData extends MkExternalApi {
/** Gets the number of untrusted sources used with this external API. */
int getNumberOfUntrustedSources() {
result = count(getUntrustedDataNode().getAnUntrustedSource())
result = count(this.getUntrustedDataNode().getAnUntrustedSource())
}
/** Gets a textual representation of this element. */

View File

@@ -12,7 +12,7 @@
* external/cwe/cwe-020
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from TreeView::RegExpCharacterRange range, string reason

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-186
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg

View File

@@ -14,7 +14,7 @@
* external/cwe/cwe-400
*/
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.regexp.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg

View File

@@ -73,11 +73,11 @@ class UninitializedConfig extends TaintTracking::Configuration {
override predicate isBarrier(DataFlow::Node node, TaintKind kind) {
kind instanceof Uninitialized and
(
definition(node.asVariable())
this.definition(node.asVariable())
or
use(node.asVariable())
this.use(node.asVariable())
or
sanitizingNode(node.asCfgNode())
this.sanitizingNode(node.asCfgNode())
)
}

View File

@@ -0,0 +1,3 @@
## 0.7.1
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.7.0
lastReleaseVersion: 0.7.1

View File

@@ -169,7 +169,7 @@ module LdapBind {
abstract predicate useSsl();
/** DEPRECATED: Alias for useSsl */
deprecated predicate useSSL() { useSsl() }
deprecated predicate useSSL() { this.useSsl() }
}
}
@@ -199,7 +199,7 @@ class LdapBind extends DataFlow::Node instanceof LdapBind::Range {
predicate useSsl() { super.useSsl() }
/** DEPRECATED: Alias for useSsl */
deprecated predicate useSSL() { useSsl() }
deprecated predicate useSSL() { this.useSsl() }
}
/** DEPRECATED: Alias for LdapBind */

View File

@@ -65,8 +65,8 @@ class DefectResult extends int {
/** Gets the URL corresponding to the location of this query result. */
string getURL() {
result =
"file://" + getFile().getAbsolutePath() + ":" + getStartLine() + ":" + getStartColumn() + ":" +
getEndLine() + ":" + getEndColumn()
"file://" + this.getFile().getAbsolutePath() + ":" + this.getStartLine() + ":" +
this.getStartColumn() + ":" + this.getEndLine() + ":" + this.getEndColumn()
}
}

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.7.1-dev
version: 0.7.2-dev
groups:
- python
- queries
@@ -9,3 +9,4 @@ dependencies:
suites: codeql-suites
extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls
warnOnImplicitThis: true

View File

@@ -39,4 +39,8 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
override predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
not exists(call.getLocation().getFile().getRelativePath())
}
override predicate identityLocalStepExclude(Node n) {
not exists(n.getLocation().getFile().getRelativePath())
}
}

View File

@@ -1,5 +1,8 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source, DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
where
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.isSink(sink))
where
exists(CallGraphConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -1,4 +1,2 @@
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source
where exists(CallGraphConfig cfg | cfg.isSource(source))
where
exists(CallGraphConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -3,5 +3,7 @@ import allFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,5 +1,8 @@
import allFlowsConfig
from DataFlow::PathNode fromNode, DataFlow::PathNode toNode
where toNode = fromNode.getASuccessor()
where
toNode = fromNode.getASuccessor() and
exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and
exists(toNode.getNode().getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,11 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlow(fromNode, toNode)
where
DataFlow::localFlow(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |

View File

@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlowStep(fromNode, toNode)
where
DataFlow::localFlowStep(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,4 +1,3 @@
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |

View File

@@ -3,5 +3,7 @@ import maximalFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,7 @@
import allFlowsConfig
from DataFlow::Node sink
where exists(AllFlowsConfig cfg | cfg.isSink(sink))
where
exists(AllFlowsConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,7 @@
import allFlowsConfig
from DataFlow::Node source
where exists(AllFlowsConfig cfg | cfg.isSource(source))
where
exists(AllFlowsConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -27,3 +27,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -25,3 +25,17 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
| datamodel.py:84:15:84:15 | ControlFlowNode for x | Node steps to itself |
| datamodel.py:166:11:166:11 | ControlFlowNode for x | Node steps to itself |
| test.py:103:10:103:15 | ControlFlowNode for SOURCE | Node steps to itself |
| test.py:130:10:130:15 | ControlFlowNode for SOURCE | Node steps to itself |
| test.py:162:13:162:18 | ControlFlowNode for SOURCE | Node steps to itself |
| test.py:167:13:167:18 | ControlFlowNode for SOURCE | Node steps to itself |
| test.py:216:10:216:15 | ControlFlowNode for SOURCE | Node steps to itself |
| test.py:242:9:242:12 | ControlFlowNode for SINK | Node steps to itself |
| test.py:669:9:669:12 | ControlFlowNode for SINK | Node steps to itself |
| test.py:670:9:670:14 | ControlFlowNode for SINK_F | Node steps to itself |
| test.py:678:9:678:12 | ControlFlowNode for SINK | Node steps to itself |
| test.py:686:9:686:12 | ControlFlowNode for SINK | Node steps to itself |
| test.py:692:5:692:8 | ControlFlowNode for SINK | Node steps to itself |

View File

@@ -726,15 +726,15 @@ def test_deep_callgraph():
return f5(arg)
x = f6(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f5(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f4(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f3(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f2(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"

View File

@@ -0,0 +1,357 @@
# This tests some of the common built-in functions and methods.
# We need a decent model of data flow through these in order to
# analyse most programs.
#
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# Actual tests
## Container constructors
### List
@expects(2)
def test_list_from_list():
l1 = [SOURCE, NONSOURCE]
l2 = list(l1)
SINK(l2[0]) #$ flow="SOURCE, l:-2 -> l2[0]"
SINK_F(l2[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l2[1]"
# -- skip list_from_string
@expects(2)
def test_list_from_tuple():
t = (SOURCE, NONSOURCE)
l = list(t)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
def test_list_from_set():
s = {SOURCE}
l = list(s)
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
@expects(2)
def test_list_from_dict():
d = {SOURCE: 'v', NONSOURCE: 'v2'}
l = list(d)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) # expecting FP due to imprecise flow
### Tuple
@expects(2)
def test_tuple_from_list():
l = [SOURCE, NONSOURCE]
t = tuple(l)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
@expects(2)
def test_tuple_from_tuple():
t0 = (SOURCE, NONSOURCE)
t = tuple(t0)
SINK(t[0]) #$ flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
def test_tuple_from_set():
s = {SOURCE}
t = tuple(s)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
@expects(2)
def test_tuple_from_dict():
d = {SOURCE: "v1", NONSOURCE: "v2"}
t = tuple(d)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
### Set
def test_set_from_list():
l = [SOURCE]
s = set(l)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_tuple():
t = (SOURCE,)
s = set(t)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_set():
s0 = {SOURCE}
s = set(s0)
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_dict():
d = {SOURCE: "val"}
s = set(d)
v = s.pop()
SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v"
### Dict
@expects(2)
def test_dict_from_keyword():
d = dict(k = SOURCE, k1 = NONSOURCE)
SINK(d["k"]) #$ flow="SOURCE, l:-1 -> d['k']"
SINK_F(d["k1"])
@expects(2)
def test_dict_from_list():
d = dict([("k", SOURCE), ("k1", NONSOURCE)])
SINK(d["k"]) #$ MISSING: flow="SOURCE, l:-1 -> d[k]"
SINK_F(d["k1"])
@expects(2)
def test_dict_from_dict():
d1 = {'k': SOURCE, 'k1': NONSOURCE}
d2 = dict(d1)
SINK(d2["k"]) #$ flow="SOURCE, l:-2 -> d2['k']"
SINK_F(d2["k1"])
## Container methods
### List
def test_list_pop():
l = [SOURCE]
v = l.pop()
SINK(v) #$ flow="SOURCE, l:-2 -> v"
def test_list_pop_index():
l = [SOURCE]
v = l.pop(0)
SINK(v) #$ MISSING: flow="SOURCE, l:-2 -> v"
def test_list_pop_index_imprecise():
l = [SOURCE, NONSOURCE]
v = l.pop(1)
SINK_F(v)
@expects(2)
def test_list_copy():
l0 = [SOURCE, NONSOURCE]
l = l0.copy()
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1])
def test_list_append():
l = [NONSOURCE]
l.append(SOURCE)
SINK(l[1]) #$ MISSING: flow="SOURCE, l:-1 -> l[1]"
### Set
def test_set_pop():
s = {SOURCE}
v = s.pop()
SINK(v) #$ flow="SOURCE, l:-2 -> v"
def test_set_copy():
s0 = {SOURCE}
s = s0.copy()
SINK(s.pop()) #$ MISSING: flow="SOURCE, l:-2 -> s.pop()"
def test_set_add():
s = set([])
s.add(SOURCE)
SINK(s.pop()) #$ MISSING: flow="SOURCE, l:-2 -> s.pop()"
### Dict
def test_dict_keys():
d = {SOURCE: "value"}
keys = d.keys()
key_list = list(keys)
SINK(key_list[0]) #$ MISSING: flow="SOURCE, l:-3 -> key_list[0]"
def test_dict_values():
d = {'k': SOURCE}
vals = d.values()
val_list = list(vals)
SINK(val_list[0]) #$ MISSING: flow="SOURCE, l:-3 -> val_list[0]"
@expects(4)
def test_dict_items():
d = {'k': SOURCE, SOURCE: "value"}
items = d.items()
item_list = list(items)
SINK_F(item_list[0][0]) # expecting FP due to imprecise flow
SINK(item_list[0][1]) #$ MISSING: flow="SOURCE, l:-4 -> item_list[0][1]"
SINK(item_list[1][0]) #$ MISSING: flow="SOURCE, l:-5 -> item_list[1][0]"
SINK_F(item_list[1][1]) # expecting FP due to imprecise flow
@expects(3)
def test_dict_pop():
d = {'k': SOURCE}
v = d.pop("k")
SINK(v) #$ flow="SOURCE, l:-2 -> v"
v1 = d.pop("k", NONSOURCE)
SINK_F(v1) #$ SPURIOUS: flow="SOURCE, l:-4 -> v1"
v2 = d.pop("non-existing", SOURCE)
SINK(v2) #$ MISSING: flow="SOURCE, l:-1 -> v2"
@expects(2)
def test_dict_get():
d = {'k': SOURCE}
v = d.get("k")
SINK(v) #$ flow="SOURCE, l:-2 -> v"
v1 = d.get("non-existing", SOURCE)
SINK(v1) #$ MISSING: flow="SOURCE, l:-1 -> v1"
@expects(2)
def test_dict_popitem():
d = {'k': SOURCE}
t = d.popitem() # could be any pair (before 3.7), but we only have one
SINK_F(t[0])
SINK(t[1]) #$ MISSING: flow="SOURCE, l:-3 -> t[1]"
@expects(2)
def test_dict_copy():
d = {'k': SOURCE, 'k1': NONSOURCE}
d1 = d.copy()
SINK(d1["k"]) #$ MISSING: flow="SOURCE, l:-2 -> d[k]"
SINK_F(d1["k1"])
## Functions on containers
### sorted
def test_sorted_list():
l0 = [SOURCE]
l = sorted(l0)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
def test_sorted_tuple():
t = (SOURCE,)
l = sorted(t)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
def test_sorted_set():
s = {SOURCE}
l = sorted(s)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
def test_sorted_dict():
d = {SOURCE: "val"}
l = sorted(d)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
### reversed
@expects(2)
def test_reversed_list():
l0 = [SOURCE, NONSOURCE]
r = reversed(l0)
l = list(r)
SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]"
SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]"
@expects(2)
def test_reversed_tuple():
t = (SOURCE, NONSOURCE)
r = reversed(t)
l = list(r)
SINK_F(l[0])
SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]"
@expects(2)
def test_reversed_dict():
d = {SOURCE: "v1", NONSOURCE: "v2"}
r = reversed(d)
l = list(r)
SINK_F(l[0])
SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]"
### iter
def test_iter_list():
l0 = [SOURCE]
i = iter(l0)
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
def test_iter_tuple():
t = (SOURCE,)
i = iter(t)
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
def test_iter_set():
t = {SOURCE}
i = iter(t)
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
def test_iter_dict():
d = {SOURCE: "val"}
i = iter(d)
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
def test_iter_iter():
# applying iter() to the result of iter() is basically a no-op
l0 = [SOURCE]
i = iter(iter(l0))
l = list(i)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-3 -> l[0]"
### next
def test_next_list():
l = [SOURCE]
i = iter(l)
n = next(i)
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
def test_next_tuple():
t = (SOURCE,)
i = iter(t)
n = next(i)
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
def test_next_set():
s = {SOURCE}
i = iter(s)
n = next(i)
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
def test_next_dict():
d = {SOURCE: "val"}
i = iter(d)
n = next(i)
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -14,21 +14,21 @@ def is_source(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable is_source"
def SINK(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable SINK"
if is_source(x): #$ runtimeFlow="ModuleVariableNode for multiphase.is_source, l:-17 -> is_source"
print("OK") #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-18 -> print"
if is_source(x): #$ runtimeFlow="ModuleVariableNode in Module multiphase for is_source, l:-17 -> is_source"
print("OK") #$ runtimeFlow="ModuleVariableNode in Module multiphase for print, l:-18 -> print"
else:
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-20 -> print"
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode in Module multiphase for print, l:-20 -> print"
def SINK_F(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable SINK_F"
if is_source(x): #$ runtimeFlow="ModuleVariableNode for multiphase.is_source, l:-24 -> is_source"
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-25 -> print"
if is_source(x): #$ runtimeFlow="ModuleVariableNode in Module multiphase for is_source, l:-24 -> is_source"
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode in Module multiphase for print, l:-25 -> print"
else:
print("OK") #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-27 -> print"
print("OK") #$ runtimeFlow="ModuleVariableNode in Module multiphase for print, l:-27 -> print"
def set_foo(): #$ importTimeFlow="FunctionExpr -> GSSA Variable set_foo"
global foo
foo = SOURCE #$ runtimeFlow="ModuleVariableNode for multiphase.SOURCE, l:-31 -> SOURCE" # missing final definition of foo
foo = SOURCE #$ runtimeFlow="ModuleVariableNode in Module multiphase for SOURCE, l:-31 -> SOURCE" # missing final definition of foo
foo = NONSOURCE #$ importTimeFlow="NONSOURCE -> GSSA Variable foo"
set_foo()
@@ -36,7 +36,7 @@ set_foo()
@expects(2)
def test_phases(): #$ importTimeFlow="expects(..)(..), l:-1 -> GSSA Variable test_phases"
global foo
SINK(foo) #$ runtimeFlow="ModuleVariableNode for multiphase.SINK, l:-39 -> SINK" runtimeFlow="ModuleVariableNode for multiphase.foo, l:-39 -> foo"
foo = NONSOURCE #$ runtimeFlow="ModuleVariableNode for multiphase.NONSOURCE, l:-40 -> NONSOURCE"
set_foo() #$ runtimeFlow="ModuleVariableNode for multiphase.set_foo, l:-41 -> set_foo"
SINK(foo) #$ runtimeFlow="ModuleVariableNode for multiphase.SINK, l:-42 -> SINK" runtimeFlow="ModuleVariableNode for multiphase.foo, l:-42 -> foo"
SINK(foo) #$ runtimeFlow="ModuleVariableNode in Module multiphase for SINK, l:-39 -> SINK" runtimeFlow="ModuleVariableNode in Module multiphase for foo, l:-39 -> foo"
foo = NONSOURCE #$ runtimeFlow="ModuleVariableNode in Module multiphase for NONSOURCE, l:-40 -> NONSOURCE"
set_foo() #$ runtimeFlow="ModuleVariableNode in Module multiphase for set_foo, l:-41 -> set_foo"
SINK(foo) #$ runtimeFlow="ModuleVariableNode in Module multiphase for SINK, l:-42 -> SINK" runtimeFlow="ModuleVariableNode in Module multiphase for foo, l:-42 -> foo"

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -2,5 +2,5 @@ os_import
| test.py:2:8:2:9 | GSSA Variable os |
flowstep
jumpStep
| test.py:2:8:2:9 | GSSA Variable os | test.py:0:0:0:0 | ModuleVariableNode for test.os |
| test.py:2:8:2:9 | GSSA Variable os | test.py:0:0:0:0 | ModuleVariableNode in Module test for os |
essaFlowStep

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:3:1:3:7 | GSSA Variable tainted | test.py:4:6:4:12 | ControlFlowNode for tainted |
| test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:3:1:3:7 | GSSA Variable tainted |
| test.py:6:1:6:11 | ControlFlowNode for FunctionExpr | test.py:6:5:6:8 | GSSA Variable func |

View File

@@ -3,5 +3,8 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where TaintTracking::localTaintStep(nodeFrom, nodeTo)
where
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
exists(nodeFrom.getLocation().getFile().getRelativePath()) and
exists(nodeTo.getLocation().getFile().getRelativePath())
select nodeFrom, nodeTo

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,6 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
| test_collections.py:20:9:20:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_unpacking.py:31:9:31:22 | ControlFlowNode for ensure_tainted | Node steps to itself |

View File

@@ -23,3 +23,19 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep
| test_async.py:48:9:48:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:64:10:64:21 | ControlFlowNode for tainted_list | Node steps to itself |
| test_collections.py:71:9:71:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:73:9:73:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:88:10:88:21 | ControlFlowNode for tainted_list | Node steps to itself |
| test_collections.py:89:10:89:23 | ControlFlowNode for TAINTED_STRING | Node steps to itself |
| test_collections.py:97:9:97:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:99:9:99:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:112:9:112:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:114:9:114:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:147:9:147:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:149:9:149:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
| test_collections.py:246:9:246:15 | ControlFlowNode for my_dict | Node steps to itself |
| test_collections.py:246:22:246:33 | ControlFlowNode for tainted_dict | Node steps to itself |
| test_for.py:24:9:24:22 | ControlFlowNode for ensure_tainted | Node steps to itself |

View File

@@ -37,6 +37,14 @@ def test_construction():
tuple(tainted_list), # $ tainted
set(tainted_list), # $ tainted
frozenset(tainted_list), # $ tainted
dict(tainted_dict), # $ tainted
dict(k = tainted_string)["k"], # $ tainted
dict(dict(k = tainted_string))["k"], # $ tainted
dict(["k", tainted_string]), # $ tainted
)
ensure_not_tainted(
dict(k = tainted_string)["k1"]
)
@@ -64,6 +72,31 @@ def test_access(x, y, z):
for i in reversed(tainted_list):
ensure_tainted(i) # $ tainted
def test_access_explicit(x, y, z):
tainted_list = [TAINTED_STRING]
ensure_tainted(
tainted_list[0], # $ tainted
tainted_list[x], # $ tainted
tainted_list[y:z], # $ tainted
sorted(tainted_list)[0], # $ tainted
reversed(tainted_list)[0], # $ tainted
iter(tainted_list), # $ tainted
next(iter(tainted_list)), # $ tainted
[i for i in tainted_list], # $ tainted
[tainted_list for i in [1,2,3]], # $ MISSING: tainted
[TAINTED_STRING for i in [1,2,3]], # $ tainted
[tainted_list], # $ tainted
)
a, b, c = tainted_list[0:3]
ensure_tainted(a, b, c) # $ tainted
for h in tainted_list:
ensure_tainted(h) # $ tainted
for i in reversed(tainted_list):
ensure_tainted(i) # $ tainted
def test_dict_access(x):
tainted_dict = TAINTED_DICT

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -46,6 +46,4 @@ class TestConfiguration extends DataFlow::Configuration {
}
override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
override int explorationLimit() { result = 5 }
}

View File

@@ -46,6 +46,4 @@ class TestConfiguration extends TaintTracking::Configuration {
}
override predicate isSanitizerIn(DataFlow::Node node) { this.isSource(node) }
override int explorationLimit() { result = 5 }
}

View File

@@ -23,3 +23,4 @@ viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
uniqueContentApprox
identityLocalStep

View File

@@ -1,10 +1,11 @@
module_tracker
| import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr |
module_attr_tracker
| import_as_attr.py:0:0:0:0 | ModuleVariableNode for import_as_attr.attr_ref |
| import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for attr_ref |
| import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember |
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
| import_as_attr.py:3:1:3:1 | GSSA Variable x |
| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref |
| import_as_attr.py:5:1:5:10 | GSSA Variable attr_ref |
| import_as_attr.py:6:5:6:5 | SSA variable y |
| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref |

View File

@@ -60,10 +60,10 @@ def test_import():
def to_inner_scope():
x = tracked # $tracked
def foo():
y = x # $ MISSING: tracked
return y # $ MISSING: tracked
also_x = foo() # $ MISSING: tracked
print(also_x) # $ MISSING: tracked
y = x # $ tracked
return y # $ tracked
also_x = foo() # $ tracked
print(also_x) # $ tracked
# ------------------------------------------------------------------------------
# Function decorator

View File

@@ -24,6 +24,11 @@ class TrackedTest extends InlineExpectationsTest {
tracked(t).flowsTo(e) and
// Module variables have no sensible location, and hence can't be annotated.
not e instanceof DataFlow::ModuleVariableNode and
// Global variables on line 0 also cannot be annotated
not e.getLocation().getStartLine() = 0 and
// We do not wish to annotate scope entry definitions,
// as they do not appear in the source code.
not e.asVar() instanceof ScopeEntryDefinition and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and

Some files were not shown because too many files have changed in this diff Show More