Python: Add call-graph to cached dataflow stage

I didn't do any performance investigation on this, since it just seems
so much like the right approach.
This commit is contained in:
Rasmus Wriedt Larsen
2022-10-20 21:19:12 +02:00
parent fc0545561e
commit 36e8b8bfb9
2 changed files with 88 additions and 76 deletions

View File

@@ -37,6 +37,7 @@ private import DataFlowPublic
private import DataFlowPrivate private import DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
private import semmle.python.internal.CachedStages
newtype TParameterPosition = newtype TParameterPosition =
/** Used for `self` in methods, and `cls` in classmethods. */ /** Used for `self` in methods, and `cls` in classmethods. */
@@ -1041,20 +1042,23 @@ predicate resolveClassInstanceCall(CallNode call, Function target, Node self) {
*/ */
cached cached
predicate resolveCall(ControlFlowNode call, Function target, CallType type) { predicate resolveCall(ControlFlowNode call, Function target, CallType type) {
type instanceof CallTypePlainFunction and Stages::DataFlow::ref() and
call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and (
not exists(Class cls | cls.getAMethod() = target) type instanceof CallTypePlainFunction and
or call.(CallNode).getFunction() = functionTracker(target).asCfgNode() and
resolveMethodCall(call, target, type, _) not exists(Class cls | cls.getAMethod() = target)
or or
type instanceof CallTypeClass and resolveMethodCall(call, target, type, _)
exists(Class cls | or
resolveClassCall(call, cls) and type instanceof CallTypeClass and
target = invokedFunctionFromClassConstruction(cls, _) exists(Class cls |
resolveClassCall(call, cls) and
target = invokedFunctionFromClassConstruction(cls, _)
)
or
type instanceof CallTypeClassInstanceCall and
resolveClassInstanceCall(call, target, _)
) )
or
type instanceof CallTypeClassInstanceCall and
resolveClassInstanceCall(call, target, _)
} }
// ============================================================================= // =============================================================================
@@ -1119,77 +1123,80 @@ cached
predicate getCallArg( predicate getCallArg(
ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos ControlFlowNode call, Function target, CallType type, Node arg, ArgumentPosition apos
) { ) {
// normal calls with a real call node Stages::DataFlow::ref() and
resolveCall(call, target, type) and
call instanceof CallNode and
( (
type instanceof CallTypePlainFunction and // normal calls with a real call node
normalCallArg(call, arg, apos) resolveCall(call, target, type) and
or call instanceof CallNode and
// self argument for normal method calls
type instanceof CallTypeNormalMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
// cls argument for classmethod calls
type instanceof CallTypeClassMethod and
apos.isSelf() and
resolveMethodCall(call, target, type, arg) and
(arg = classTracker(_) or arg = clsTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or
// normal arguments for method calls
( (
type instanceof CallTypeNormalMethod or type instanceof CallTypePlainFunction and
type instanceof CallTypeStaticMethod or normalCallArg(call, arg, apos)
type instanceof CallTypeClassMethod
) and
normalCallArg(call, arg, apos)
or
// method as plain function call.
//
// argument index 0 of call has position self (and MUST be given as positional
// argument in call). This also means that call-arguments are shifted by 1, such
// that argument index 1 of call has argument position 0
type instanceof CallTypeMethodAsPlainFunction and
(
apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
or or
not apos.isPositional(_) and normalCallArg(call, arg, apos) // self argument for normal method calls
or type instanceof CallTypeNormalMethod and
exists(ArgumentPosition normalPos, int index | apos.isSelf() and
apos.isPositional(index - 1) and resolveMethodCall(call, target, type, arg) and
normalPos.isPositional(index) and // dataflow lib has requirement that arguments and calls are in same enclosing callable.
normalCallArg(call, arg, normalPos) exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
) )
)
or
// class call
type instanceof CallTypeClass and
(
// only pass synthetic node for created object to __init__, and not __new__ since
// __new__ is a classmethod.
target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
arg = TSyntheticPreUpdateNode(call)
or or
normalCallArg(call, arg, apos) // cls argument for classmethod calls
) type instanceof CallTypeClassMethod and
or
// call on class instance, which goes to `__call__` method
type instanceof CallTypeClassInstanceCall and
(
apos.isSelf() and apos.isSelf() and
resolveClassInstanceCall(call, target, arg) resolveMethodCall(call, target, type, arg) and
(arg = classTracker(_) or arg = clsTracker(_)) and
// dataflow lib has requirement that arguments and calls are in same enclosing callable.
exists(CfgNode cfgNode | cfgNode.getNode() = call |
cfgNode.getEnclosingCallable() = arg.getEnclosingCallable()
)
or or
// normal arguments for method calls
(
type instanceof CallTypeNormalMethod or
type instanceof CallTypeStaticMethod or
type instanceof CallTypeClassMethod
) and
normalCallArg(call, arg, apos) normalCallArg(call, arg, apos)
or
// method as plain function call.
//
// argument index 0 of call has position self (and MUST be given as positional
// argument in call). This also means that call-arguments are shifted by 1, such
// that argument index 1 of call has argument position 0
type instanceof CallTypeMethodAsPlainFunction and
(
apos.isSelf() and arg.asCfgNode() = call.(CallNode).getArg(0)
or
not apos.isPositional(_) and normalCallArg(call, arg, apos)
or
exists(ArgumentPosition normalPos, int index |
apos.isPositional(index - 1) and
normalPos.isPositional(index) and
normalCallArg(call, arg, normalPos)
)
)
or
// class call
type instanceof CallTypeClass and
(
// only pass synthetic node for created object to __init__, and not __new__ since
// __new__ is a classmethod.
target = invokedFunctionFromClassConstruction(_, "__init__") and
apos.isSelf() and
arg = TSyntheticPreUpdateNode(call)
or
normalCallArg(call, arg, apos)
)
or
// call on class instance, which goes to `__call__` method
type instanceof CallTypeClassInstanceCall and
(
apos.isSelf() and
resolveClassInstanceCall(call, target, arg)
or
normalCallArg(call, arg, apos)
)
) )
) )
} }

View File

@@ -180,6 +180,7 @@ module Stages {
predicate ref() { 1 = 1 } predicate ref() { 1 = 1 }
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
private import semmle.python.internal.Awaited as Awaited private import semmle.python.internal.Awaited as Awaited
@@ -195,6 +196,10 @@ module Stages {
or or
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _) any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
or or
DataFlowDispatch::resolveCall(_, _, _)
or
DataFlowDispatch::getCallArg(_, _, _, _, _)
or
any(LocalSources::LocalSourceNode n).flowsTo(_) any(LocalSources::LocalSourceNode n).flowsTo(_)
or or
exists(Awaited::awaited(_)) exists(Awaited::awaited(_))