Files
codeql/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll

226 lines
8.8 KiB
Plaintext

/**
* Provides Python specific classes and predicates for defining flow summaries.
*
* Flow summaries are defined for callables that are not extracted.
* Such callables go by different names in different parts of our codebase:
*
* - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s.
* These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`.
*
* - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase).
* These are identified by strings and has predicates for finding calls to them.
*
* Having both extracted and non-extracted callables means that we now have three types of calls:
* - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow.
* - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by looking up the relevant summary when the
* global data flow graph is connected up via `getViableCallable`.
* - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework.
*
* The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where
* `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for
* call resolution in global data flow. That mechanism is `getViableCallable`.
* Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking.
* To avoid that type tracking becomes mutually recursive with data flow, type tracking must use a call graph not including summaries.
* Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`.
*
* We do not support summaries of special methods via the special methods framework,
* the summary would have to identify the call.
*
* We might, while we still extract the standard library, want to support flow summaries of
* extracted callables, so that we can model part of the standard library with flow summaries.
* For this to work, we have be careful with the enclosing callable predicate.
*/
private import python
private import DataFlowPrivate
private import DataFlowPublic
private import DataFlowImplCommon
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
class SummarizedCallableBase = string;
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
/** Gets the parameter position of the instance parameter. */
ArgumentPosition instanceParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks
/** Gets the synthesized summary data-flow node for the given values. */
Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
/** Gets the synthesized data-flow call for `receiver`. */
SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
/** Gets the type of content `c`. */
DataFlowType getContentType(Content c) { any() }
/** Gets the return type of kind `rk` for callable `c`. */
bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the `i`th parameter in a synthesized call that targets a
* callback of type `t`.
*/
bindingset[t, i]
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
* callback of type `t`.
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
/** Gets the type of synthetic global `sg`. */
DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, kind `kind`, and provenance `provenance`.
*/
predicate summaryElement(
FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance
) {
exists(boolean preservesValue |
c.propagatesFlowExt(input, output, preservesValue) and
(if preservesValue = true then kind = "value" else kind = "taint") and
provenance = "manual"
)
}
/**
* Holds if a neutral model exists for `c` with provenance `provenance`,
* which means that there is no flow through `c`.
* Note. Neutral models have not been implemented for Python.
*/
predicate neutralElement(FlowSummary::SummarizedCallable c, string provenance) { none() }
/**
* Gets the summary component for specification component `c`, if any.
*
* This covers all the Python-specific components of a flow summary.
*/
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
c = "ListElement" and
result = FlowSummary::SummaryComponent::listElement()
}
/** Gets the textual representation of a summary component in the format used for flow summaries. */
string getComponentSpecificCsv(SummaryComponent sc) {
sc = TContentSummaryComponent(any(ListElementContent c)) and
result = "ListElement"
}
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
string getParameterPositionCsv(ParameterPosition pos) { result = pos.toString() }
/** Gets the textual representation of an argument position in the format used for flow summaries. */
string getArgumentPositionCsv(ArgumentPosition pos) { result = pos.toString() }
/** Holds if input specification component `c` needs a reference. */
predicate inputNeedsReferenceSpecific(string c) { none() }
/** Holds if output specification component `c` needs a reference. */
predicate outputNeedsReferenceSpecific(string c) { none() }
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
ReturnKind getReturnValueKind() { any() }
/**
* All definitions in this module are required by the shared implementation
* (for source/sink interpretation), but they are unused for Python, where
* we rely on API graphs instead.
*/
private module UnusedSourceSinkInterpretation {
/**
* Holds if an external source specification exists for `n` with output specification
* `output`, kind `kind`, and provenance `provenance`.
*/
predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() }
/**
* Holds if an external sink specification exists for `n` with input specification
* `input`, kind `kind` and provenance `provenance`.
*/
predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() }
class SourceOrSinkElement = AstNode;
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends AstNode_ {
// InterpretNode is going away, this is just a dummy implementation.
// However, we have some old location tests picking them up, so we
// explicitly define them to not exist.
InterpretNode() { none() }
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { none() }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { none() }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { none() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { none() }
/** Gets the target of this call, if any. */
SourceOrSinkElement getCallTarget() { none() }
}
/** Provides additional sink specification logic. */
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
/** Provides additional source specification logic. */
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
}
import UnusedSourceSinkInterpretation
module ParsePositions {
private import FlowSummaryImpl
private predicate isParamBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Parameter" and
body = tok.getAnArgument()
)
}
private predicate isArgBody(string body) {
exists(AccessPathToken tok |
tok.getName() = "Argument" and
body = tok.getAnArgument()
)
}
predicate isParsedParameterPosition(string c, int i) {
isParamBody(c) and
i = AccessPath::parseInt(c)
}
predicate isParsedArgumentPosition(string c, int i) {
isArgBody(c) and
i = AccessPath::parseInt(c)
}
}
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
ArgumentPosition parseParamBody(string s) {
exists(int i |
ParsePositions::isParsedParameterPosition(s, i) and
result.isPositional(i)
)
}
/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
ParameterPosition parseArgBody(string s) {
exists(int i |
ParsePositions::isParsedArgumentPosition(s, i) and
result.isPositional(i)
)
}