Initial commit of Python queries and QL libraries.

This commit is contained in:
Mark Shannon
2018-11-19 13:13:39 +00:00
committed by Mark Shannon
parent 90c75cd362
commit 5f58824d1b
725 changed files with 63520 additions and 0 deletions

View File

@@ -0,0 +1,126 @@
/**
* @name Alert suppression
* @description Generates information about alert suppressions.
* @kind alert-suppression
* @id py/alert-suppression
*/
import python
/**
* An alert suppression comment.
*/
abstract class SuppressionComment extends Comment {
/** Gets the scope of this suppression. */
abstract SuppressionScope getScope();
/** Gets the suppression annotation in this comment. */
abstract string getAnnotation();
/**
* Holds if this comment applies to the range from column `startcolumn` of line `startline`
* to column `endcolumn` of line `endline` in file `filepath`.
*/
abstract predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn);
}
/**
* An alert comment that applies to a single line
*/
abstract class LineSuppressionComment extends SuppressionComment {
LineSuppressionComment() {
exists(string filepath, int l |
this.getLocation().hasLocationInfo(filepath, l, _, _, _) and
any(AstNode a).getLocation().hasLocationInfo(filepath, l, _, _, _)
)
}
/** Gets the scope of this suppression. */
override SuppressionScope getScope() {
result = this
}
override predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and
startcolumn = 1
}
}
/**
* An lgtm suppression comment.
*/
class LgtmSuppressionComment extends LineSuppressionComment {
string annotation;
LgtmSuppressionComment() {
exists(string all |
all = this.getContents()
|
// match `lgtm[...]` anywhere in the comment
annotation = all.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _)
or
// match `lgtm` at the start of the comment and after semicolon
annotation = all.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim()
)
}
/** Gets the suppression annotation in this comment. */
override string getAnnotation() {
result = annotation
}
}
/**
* A noqa suppression comment. Both pylint and pyflakes respect this, so lgtm ought to too.
*/
class NoqaSuppressionComment extends LineSuppressionComment {
NoqaSuppressionComment() {
this.getContents().toLowerCase().regexpMatch("\\s*noqa\\s*")
}
override string getAnnotation() {
result = "lgtm"
}
}
/**
* The scope of an alert suppression comment.
*/
class SuppressionScope extends @py_comment {
SuppressionScope() {
this instanceof SuppressionComment
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [LGTM locations](https://lgtm.com/help/ql/locations).
*/
predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a textual representation of this element. */
string toString() {
result = "suppression range"
}
}
from SuppressionComment c
select c, // suppression comment
c.getContents(), // text of suppression comment (excluding delimiters)
c.getAnnotation(), // text of suppression annotation
c.getScope() // scope of suppression

View File

@@ -0,0 +1,25 @@
/** Compute the total call-graph facts, the total size of the call-graph relation and
* the ratio of the two in relation to the depth of context.
*/
import python
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
from int total_facts, int total_size, int depth, float efficiency
where
total_facts = strictcount(ControlFlowNode call, FunctionObject func |
exists(PointsToContext ctx |
call = PointsTo::get_a_call(func, ctx) and
depth = ctx.getDepth()
)
)
and
total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx |
call = PointsTo::get_a_call(func, ctx) and
depth = ctx.getDepth()
)
and
efficiency = 100.0 * total_facts / total_size
select depth, total_facts, total_size, efficiency

View File

@@ -0,0 +1,29 @@
/** Compute the marginal increase call-graph facts, the total size of the call-graph relation and
* the ratio of the two in relation to the depth of context.
*/
import python
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
from int total_facts, int total_size, int depth, float efficiency
where
total_facts = strictcount(ControlFlowNode call, FunctionObject func |
exists(PointsToContext ctx |
call = PointsTo::get_a_call(func, ctx) and
depth = ctx.getDepth()
and not
exists(PointsToContext shallower |
call = PointsTo::get_a_call(func, shallower) and
shallower.getDepth() < depth
)
)
)
and
total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx |
call = PointsTo::get_a_call(func, ctx) and
depth = ctx.getDepth()
)
and
efficiency = 100.0 * total_facts / total_size
select depth, total_facts, total_size, efficiency

View File

@@ -0,0 +1,25 @@
/** Compute the total points-to facts, the total size of the points-to relation and
* the ratio of the two in relation to the depth of context.
*/
import python
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
from int total_facts, int total_size, int depth, float efficiency
where
total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
exists(PointsToContext ctx |
PointsTo::points_to(f, ctx, value, cls, _) and
depth = ctx.getDepth()
)
)
and
total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig |
PointsTo::points_to(f, ctx, value, cls, orig) and
depth = ctx.getDepth()
)
and
efficiency = 100.0 * total_facts / total_size
select depth, total_facts, total_size, efficiency

View File

@@ -0,0 +1,32 @@
/** Compute the marginal increase points-to facts, the total size of the points-to relation and
* the ratio of the two in relation to the depth of context.
*/
import python
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
int depth(ControlFlowNode f, Object value, ClassObject cls) {
exists(PointsToContext ctx |
PointsTo::points_to(f, ctx, value, cls, _) and
result = ctx.getDepth()
)
}
int shallowest(ControlFlowNode f, Object value, ClassObject cls) {
result = min(int x | x = depth(f, value, cls))
}
from int total_facts, int total_size, int depth, float efficiency
where
total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
depth = shallowest(f, value, cls)
)
and
total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig |
PointsTo::points_to(f, ctx, value, cls, orig) and
depth = ctx.getDepth()
)
and
efficiency = 100.0 * total_facts / total_size
select depth, total_facts, total_size, efficiency

View File

@@ -0,0 +1,115 @@
/**
* Symbols for crosss-project jump-to-definition resolution.
*/
import python
import semmle.dataflow.SSA
import semmle.python.pointsto.PointsTo
private newtype TSymbol =
TModule(Module m)
or
TMember(Symbol outer, string part) {
exists(Object o |
outer.resolvesTo() = o |
o.(ModuleObject).hasAttribute(part)
or
o.(ClassObject).hasAttribute(part)
)
}
/** A "symbol" referencing an object in another module
* Symbols are represented by the module name and the dotted name by which the
* object would be referred to in that module.
* For example for the code:
* ```
* class C:
* def m(self): pass
* ```
* If the code were in a module `mod`,
* then symbol for the method `m` would be "mod/C.m"
*/
class Symbol extends TSymbol {
string toString() {
exists(Module m |
this = TModule(m) and result = m.getName()
)
or
exists(TModule outer, string part |
this = TMember(outer, part) and
outer = TModule(_) and
result = outer.(Symbol).toString() + "/" + part
)
or
exists(TMember outer, string part |
this = TMember(outer, part) and
outer = TMember(_, _) and
result = outer.(Symbol).toString() + "." + part
)
}
/** Finds the `AstNode` that this `Symbol` refers to.
*/
AstNode find() {
this = TModule(result)
or
exists(Symbol s, string name |
this = TMember(s, name) |
exists(ClassObject cls |
s.resolvesTo() = cls and
cls.attributeRefersTo(name, _, result.getAFlowNode())
)
or
exists(ModuleObject m |
s.resolvesTo() = m and
m.attributeRefersTo(name, _, result.getAFlowNode())
)
)
}
/** Find the class or module `Object` that this `Symbol` refers to, if
* this `Symbol` refers to a class or module.
*/
Object resolvesTo() {
this = TModule(result.(ModuleObject).getModule())
or
exists(Symbol s, string name, Object o |
this = TMember(s, name) and
o = s.resolvesTo() and
result = attribute_in_scope(o, name)
)
}
/** Gets the `Module` for the module part of this `Symbol`.
* For example, this would return the `os` module for the `Symbol` "os/environ".
*/
Module getModule() {
this = TModule(result)
or
exists(Symbol outer |
this = TMember(outer, _) and result = outer.getModule()
)
}
/** Gets the `Symbol` that is the named member of this `Symbol`.
*/
Symbol getMember(string name) {
result = TMember(this, name)
}
}
/* Helper for `Symbol`.resolvesTo() */
private Object attribute_in_scope(Object obj, string name) {
exists(ClassObject cls |
cls = obj |
cls.lookupAttribute(name) = result and result.(ControlFlowNode).getScope() = cls.getPyClass()
)
or
exists(ModuleObject mod |
mod = obj |
mod.getAttribute(name) = result and result.(ControlFlowNode).getScope() = mod.getModule()
and not result.(ControlFlowNode).isEntryNode()
)
}

View File

@@ -0,0 +1,483 @@
/**
* Definition tracking for jump-to-defn query.
*/
import python
import semmle.dataflow.SSA
import semmle.python.pointsto.PointsTo
private newtype TDefinition =
TLocalDefinition(AstNode a) {
a instanceof Expr or a instanceof Stmt or a instanceof Module
}
/** A definition for the purposes of jump-to-definition.
*/
class Definition extends TLocalDefinition {
string toString() {
result = "Definition " + this.getAstNode().getLocation().toString()
}
AstNode getAstNode() {
this = TLocalDefinition(result)
}
Module getModule() {
result = this.getAstNode().getScope().getEnclosingModule()
}
Location getLocation() {
result = this.getAstNode().getLocation()
}
}
private predicate jump_to_defn(ControlFlowNode use, Definition defn) {
exists(EssaVariable var |
use = var.getASourceUse() and
ssa_variable_defn(var, defn)
)
or
exists(string name |
use.isLoad() and
jump_to_defn_attribute(use.(AttrNode).getObject(name), name, defn)
)
or
exists(PythonModuleObject mod |
use.(ImportExprNode).refersTo(mod) and
defn.getAstNode() = mod.getModule()
)
or
exists(PythonModuleObject mod, string name |
use.(ImportMemberNode).getModule(name).refersTo(mod) and
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
)
or
exists(PackageObject package |
use.(ImportExprNode).refersTo(package) and
defn.getAstNode() = package.getInitModule().getModule()
)
or
exists(PackageObject package, string name |
use.(ImportMemberNode).getModule(name).refersTo(package) and
scope_jump_to_defn_attribute(package.getInitModule().getModule(), name, defn)
)
or
(use instanceof PyFunctionObject or use instanceof ClassObject) and
defn.getAstNode() = use.getNode()
}
/* Prefer class and functions to class-expressions and function-expressions. */
private predicate preferred_jump_to_defn(Expr use, Definition def) {
not use instanceof ClassExpr and
not use instanceof FunctionExpr and
jump_to_defn(use.getAFlowNode(), def)
}
private predicate unique_jump_to_defn(Expr use, Definition def) {
preferred_jump_to_defn(use, def) and
not exists(Definition other |
other != def and
preferred_jump_to_defn(use, other)
)
}
private predicate ssa_variable_defn(EssaVariable var, Definition defn) {
ssa_defn_defn(var.getDefinition(), defn)
}
/** Holds if the phi-function `phi` refers to (`value`, `cls`, `origin`) given the context `context`. */
private predicate ssa_phi_defn(PhiFunction phi, Definition defn) {
ssa_variable_defn(phi.getAnInput(), defn)
}
/** Holds if the ESSA defn `def` refers to (`value`, `cls`, `origin`) given the context `context`. */
private predicate ssa_defn_defn(EssaDefinition def, Definition defn) {
ssa_phi_defn(def, defn)
or
ssa_node_defn(def, defn)
or
ssa_filter_defn(def, defn)
or
ssa_node_refinement_defn(def, defn)
}
/** Holds if ESSA edge refinement, `def`, is defined by `defn` */
predicate ssa_filter_defn(PyEdgeRefinement def, Definition defn) {
ssa_variable_defn(def.getInput(), defn)
}
/** Holds if ESSA defn, `uniphi`,is defined by `defn` */
predicate uni_edged_phi_defn(SingleSuccessorGuard uniphi, Definition defn) {
ssa_variable_defn(uniphi.getInput(), defn)
}
pragma [noinline]
private predicate ssa_node_defn(EssaNodeDefinition def, Definition defn) {
assignment_jump_to_defn(def, defn)
or
parameter_defn(def, defn)
or
delete_defn(def, defn)
or
scope_entry_defn(def, defn)
or
implicit_submodule_defn(def, defn)
}
/* Definition for normal assignments `def = ...` */
private predicate assignment_jump_to_defn(AssignmentDefinition def, Definition defn) {
defn = TLocalDefinition(def.getValue().getNode())
}
pragma [noinline]
private predicate ssa_node_refinement_defn(EssaNodeRefinement def, Definition defn) {
method_callsite_defn(def, defn)
or
import_star_defn(def, defn)
or
attribute_assignment_defn(def, defn)
or
callsite_defn(def, defn)
or
argument_defn(def, defn)
or
attribute_delete_defn(def, defn)
or
uni_edged_phi_defn(def, defn)
}
/* Definition for parameter. `def foo(param): ...` */
private predicate parameter_defn(ParameterDefinition def, Definition defn) {
defn.getAstNode() = def.getDefiningNode().getNode()
}
/* Definition for deletion: `del name` */
private predicate delete_defn(DeletionDefinition def, Definition defn) {
none()
}
/* Implicit "defn" of the names of submodules at the start of an `__init__.py` file.
*/
private predicate implicit_submodule_defn(ImplicitSubModuleDefinition def, Definition defn) {
exists(PackageObject package, ModuleObject mod |
package.getInitModule().getModule() = def.getDefiningNode().getScope() and
mod = package.submodule(def.getSourceVariable().getName()) and
defn.getAstNode() = mod.getModule()
)
}
/* Helper for scope_entry_value_transfer(...). Transfer of values from the callsite to the callee, for enclosing variables, but not arguments/parameters */
private predicate scope_entry_value_transfer_at_callsite(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
exists(CallNode callsite, FunctionObject f |
f.getACall() = callsite and
pred_var.getSourceVariable() = succ_def.getSourceVariable() and
pred_var.getAUse() = callsite and
succ_def.getDefiningNode() = f.getFunction().getEntryNode()
)
}
/* Model the transfer of values at scope-entry points. Transfer from `pred_var, pred_context` to `succ_def, succ_context` */
private
predicate scope_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
BaseFlow::scope_entry_value_transfer_from_earlier(pred_var, _, succ_def, _)
or
scope_entry_value_transfer_at_callsite(pred_var, succ_def)
or
class_entry_value_transfer(pred_var, succ_def)
}
/* Helper for scope_entry_value_transfer */
private
predicate class_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
exists(ImportTimeScope scope, ControlFlowNode class_def |
class_def = pred_var.getAUse() and
scope.entryEdge(class_def, succ_def.getDefiningNode()) and
pred_var.getSourceVariable() = succ_def.getSourceVariable()
)
}
/* Definition for implicit variable declarations at scope-entry. */
pragma [noinline]
private predicate scope_entry_defn(ScopeEntryDefinition def, Definition defn) {
/* Transfer from another scope */
exists(EssaVariable var |
scope_entry_value_transfer(var, def) and
ssa_variable_defn(var, defn)
)
}
/* Definition for a variable (possibly) redefined by a call:
* Just assume that call does not define variable
*/
pragma [noinline]
private predicate callsite_defn(CallsiteRefinement def, Definition defn) {
ssa_variable_defn(def.getInput(), defn)
}
/* Pass through for `self` for the implicit re-defn of `self` in `self.foo()` */
private predicate method_callsite_defn(MethodCallsiteRefinement def, Definition defn) {
/* The value of self remains the same, only the attributes may change */
ssa_variable_defn(def.getInput(), defn)
}
/** Helpers for import_star_defn */
pragma [noinline]
private predicate module_and_name_for_import_star(ModuleObject mod, string name, ImportStarRefinement def) {
exists(ImportStarNode im_star |
im_star = def.getDefiningNode() |
name = def.getSourceVariable().getName() and
im_star.getModule().refersTo(mod) and
mod.exports(name)
)
}
/** Holds if `def` is technically a defn of `var`, but the `from ... import *` does not in fact define `var` */
pragma [noinline]
private predicate variable_not_redefined_by_import_star(EssaVariable var, ImportStarRefinement def) {
var = def.getInput() and
exists(ModuleObject mod |
def.getDefiningNode().(ImportStarNode).getModule().refersTo(mod) and
not mod.exports(var.getSourceVariable().getName())
)
}
/* Definition for `from ... import *` */
private predicate import_star_defn(ImportStarRefinement def, Definition defn) {
exists(ModuleObject mod, string name |
module_and_name_for_import_star(mod, name, def) |
/* Attribute from imported module */
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
)
or
exists(EssaVariable var |
/* Retain value held before import */
variable_not_redefined_by_import_star(var, def) and
ssa_variable_defn(var, defn)
)
}
/** Attribute assignments have no effect as far as defn tracking is concerned */
private predicate attribute_assignment_defn(AttributeAssignment def, Definition defn) {
ssa_variable_defn(def.getInput(), defn)
}
/** Ignore the effects of calls on their arguments. This is an approximation, but attempting to improve accuracy would be very expensive for very little gain. */
private predicate argument_defn(ArgumentRefinement def, Definition defn) {
ssa_variable_defn(def.getInput(), defn)
}
/** Attribute deletions have no effect as far as value tracking is concerned. */
pragma [noinline]
private predicate attribute_delete_defn(EssaAttributeDeletion def, Definition defn) {
ssa_variable_defn(def.getInput(), defn)
}
/* Definition flow for attributes. These mirror the "normal" defn predicates.
* For each defn predicate `xxx_defn(XXX def, Definition defn)`
* There is an equivalent predicate that tracks the values in attributes:
* `xxx_jump_to_defn_attribute(XXX def, string name, Definition defn)`
* */
/** INTERNAL -- Public for testing only.
* Holds if the attribute `name` of the ssa variable `var` refers to (`value`, `cls`, `origin`)
*/
predicate ssa_variable_jump_to_defn_attribute(EssaVariable var, string name, Definition defn) {
ssa_defn_jump_to_defn_attribute(var.getDefinition(), name, defn)
}
/** Helper for ssa_variable_jump_to_defn_attribute */
private predicate ssa_defn_jump_to_defn_attribute(EssaDefinition def, string name, Definition defn) {
ssa_phi_jump_to_defn_attribute(def, name, defn)
or
ssa_node_jump_to_defn_attribute(def, name, defn)
or
ssa_node_refinement_jump_to_defn_attribute(def, name, defn)
or
ssa_filter_jump_to_defn_attribute(def, name, defn)
}
/** Holds if ESSA edge refinement, `def`, is defined by `defn` of `priority` */
predicate ssa_filter_jump_to_defn_attribute(PyEdgeRefinement def, string name, Definition defn) {
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn)
}
/** Holds if the attribute `name` of the ssa phi-function defn `phi` refers to (`value`, `cls`, `origin`) */
private predicate ssa_phi_jump_to_defn_attribute(PhiFunction phi, string name, Definition defn) {
ssa_variable_jump_to_defn_attribute(phi.getAnInput(), name, defn)
}
/** Helper for ssa_defn_jump_to_defn_attribute */
pragma[noinline]
private predicate ssa_node_jump_to_defn_attribute(EssaNodeDefinition def, string name, Definition defn) {
assignment_jump_to_defn_attribute(def, name, defn)
or
self_parameter_jump_to_defn_attribute(def, name, defn)
or
scope_entry_jump_to_defn_attribute(def, name, defn)
}
/** Helper for ssa_defn_jump_to_defn_attribute */
pragma[noinline]
private predicate ssa_node_refinement_jump_to_defn_attribute(EssaNodeRefinement def, string name, Definition defn) {
attribute_assignment_jump_to_defn_attribute(def, name, defn)
or
argument_jump_to_defn_attribute(def, name, defn)
}
pragma[noinline]
private predicate scope_entry_jump_to_defn_attribute(ScopeEntryDefinition def, string name, Definition defn) {
exists(EssaVariable var |
scope_entry_value_transfer(var, def) and
ssa_variable_jump_to_defn_attribute(var, name, defn)
)
}
private predicate scope_jump_to_defn_attribute(ImportTimeScope s, string name, Definition defn) {
exists(EssaVariable var |
BaseFlow::reaches_exit(var) and var.getScope() = s and
var.getName() = name
|
ssa_variable_defn(var, defn)
)
}
private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Definition defn) {
/* Local attribute */
exists(EssaVariable var |
use = var.getASourceUse() and
ssa_variable_jump_to_defn_attribute(var, name, defn)
)
or
/* Instance attributes */
exists(ClassObject cls |
use.refersTo(_, cls, _) |
scope_jump_to_defn_attribute(cls.getPyClass(), name, defn)
)
or
/* Super attributes */
exists(AttrNode f, SuperBoundMethod sbm, Object function |
use = f.getObject(name) and
f.refersTo(sbm) and function = sbm.getFunction(_) and
function.getOrigin() = defn.getAstNode()
)
or
/* Class or module attribute */
exists(Object obj, Scope scope |
use.refersTo(obj) and
scope_jump_to_defn_attribute(scope, name, defn) |
obj.(ClassObject).getPyClass() = scope
or
obj.(PythonModuleObject).getModule() = scope
or
obj.(PackageObject).getInitModule().getModule() = scope
)
}
pragma[noinline]
private predicate assignment_jump_to_defn_attribute(AssignmentDefinition def, string name, Definition defn) {
jump_to_defn_attribute(def.getValue(), name, defn)
}
pragma[noinline]
private predicate attribute_assignment_jump_to_defn_attribute(AttributeAssignment def, string name, Definition defn) {
defn.getAstNode() = def.getDefiningNode().getNode() and name = def.getName()
or
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn) and not name = def.getName()
}
/** Holds if `def` defines the attribute `name`
* `def` takes the form `setattr(use, "name")` where `use` is the input to the defn.
*/
private predicate sets_attribute(ArgumentRefinement def, string name) {
exists(CallNode call |
call = def.getDefiningNode() and
call.getFunction().refersTo(builtin_object("setattr")) and
def.getInput().getAUse() = call.getArg(0) and
call.getArg(1).getNode().(StrConst).getText() = name
)
}
pragma[noinline]
private predicate argument_jump_to_defn_attribute(ArgumentRefinement def, string name, Definition defn) {
if sets_attribute(def, name) then
jump_to_defn(def.getDefiningNode().(CallNode).getArg(2), defn)
else
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn)
}
/** Gets the (temporally) preceding variable for "self", e.g. `def` is in method foo() and `result` is in `__init__()`. */
private EssaVariable preceding_self_variable(ParameterDefinition def) {
def.isSelf() and
exists(Function preceding, Function method |
method = def.getScope() and
// Only methods
preceding.isMethod() and preceding.precedes(method) and
BaseFlow::reaches_exit(result) and result.getSourceVariable().(Variable).isSelf() and
result.getScope() = preceding
)
}
pragma [noinline]
private predicate self_parameter_jump_to_defn_attribute(ParameterDefinition def, string name, Definition defn) {
ssa_variable_jump_to_defn_attribute(preceding_self_variable(def), name, defn)
}
/** Gets a definition for 'use'.
* This exists primarily for testing use `getPreferredDefinition()` instead.
*/
Definition getADefinition(Expr use) {
jump_to_defn(use.getAFlowNode(), result) and
not use instanceof Call and
not use.isArtificial() and
// Not the use itself
not result = TLocalDefinition(use)
}
/** Gets the unique definition for 'use', if one can be found.
* Helper for the jump-to-definition query.
*/
Definition getUniqueDefinition(Expr use) {
unique_jump_to_defn(use, result) and
not use instanceof Call and
not use.isArtificial() and
// Not the use itself
not result = TLocalDefinition(use)
}
/** Helper class to get suitable locations for attributes */
class NiceLocationExpr extends @py_expr {
string toString() {
result = this.(Expr).toString()
}
predicate hasLocationInfo(string f, int bl, int bc, int el, int ec) {
/* Attribute location for x.y is that of 'y' so that url does not overlap with that of 'x' */
exists(int abl, int abc |
this.(Attribute).getLocation().hasLocationInfo(f, abl, abc, el, ec) |
bl = el and bc = ec - this.(Attribute).getName().length() + 1
)
or
this.(Name).getLocation().hasLocationInfo(f, bl, bc, el, ec)
or
/* Show xxx for `xxx` in `from xxx import y` or
* for `import xxx` or for `import xxx as yyy`. */
this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec)
or
/* Show y for `y` in `from xxx import y` */
exists(string name |
name = this.(ImportMember).getName() and
this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and
bl = el and bc = ec-name.length()+1
)
}
}

View File

@@ -0,0 +1,17 @@
/**
* @name Definitions
* @description Jump to definition helper query.
* @kind definitions
* @id py/jump-to-definition
*/
import python
import DefinitionTracking
from NiceLocationExpr use, Definition defn, string kind, string f, int l
where defn = getUniqueDefinition(use) and kind = "Definition"
and use.hasLocationInfo(f, l, _, _, _) and
// Ignore if the definition is on the same line as the use
not defn.getLocation().hasLocationInfo(f, l, _, _, _)
select use, defn, kind

View File

@@ -0,0 +1,33 @@
/**
* Compute the efficiency of the points-to relation. That is the ratio of
* "interesting" facts to total facts.
*/
import python
import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
predicate trivial(ControlFlowNode f) {
exists(Parameter p | p = f.getNode())
or
f instanceof NameConstantNode
or
f.getNode() instanceof ImmutableLiteral
}
from int interesting_facts, int interesting_facts_in_source, int total_size,float efficiency
where
interesting_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
f.refersTo(value, cls, _) and not trivial(f)
)
and
interesting_facts_in_source = strictcount(ControlFlowNode f, Object value, ClassObject cls |
f.refersTo(value, cls, _) and not trivial(f) and exists(f.getScope().getEnclosingModule().getFile().getRelativePath())
)
and
total_size = strictcount(ControlFlowNode f, PointsToContext ctx, Object value, ClassObject cls, ControlFlowNode orig |
PointsTo::points_to(f, ctx, value, cls, orig)
)
and
efficiency = 100.0 * interesting_facts_in_source / total_size
select interesting_facts, interesting_facts_in_source, total_size, efficiency

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.pointsto.PointsTo
from ClassObject cls, string reason
where
PointsTo::Types::failed_inference(cls, reason)
select cls, reason

View File

@@ -0,0 +1,28 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Tracing which module is imported by an import statement is very important in ensuring that the whole program is available
for analysis. Failure to determine which module is imported by an import reduces the extent and accuracy of Semmle's analysis.
</p>
<p>
Missing imports will degrade the effectiveness of code analysis and may result in errors going undetected.
</p>
</overview>
<recommendation>
<p>
Ensure that all required modules and packages can be found when running the extractor.
</p>
</recommendation>
<references>
<li>Semmle Tutorial: <a href="https://semmle.com/wiki/pages/viewpage.action?pageId=9493108">Basic project creation (Python)</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,71 @@
/**
* @name Unresolved import
* @description An unresolved import may result in reduced coverage and accuracy of analysis.
* @kind problem
* @problem.severity info
* @id py/import-failure
*/
import python
ImportExpr alternative_import(ImportExpr ie) {
exists(Alias thisalias, Alias otheralias |
(thisalias.getValue() = ie or ((ImportMember)thisalias.getValue()).getModule() = ie)
and
(otheralias.getValue() = result or ((ImportMember)otheralias.getValue()).getModule() = result)
and
(
exists(If i | i.getBody().contains(ie) and i.getOrelse().contains(result)) or
exists(If i | i.getBody().contains(result) and i.getOrelse().contains(ie)) or
exists(Try t | t.getBody().contains(ie) and t.getAHandler().contains(result)) or
exists(Try t | t.getBody().contains(result) and t.getAHandler().contains(ie))
)
)
}
string os_specific_import(ImportExpr ie) {
exists(string name | name = ie.getImportedModuleName() |
name.matches("org.python.%") and result = "java"
or
name.matches("java.%") and result = "java"
or
name.matches("Carbon.%") and result = "darwin"
or
result = "win32" and (
name = "_winapi" or name = "_win32api" or name = "_winreg" or
name = "nt" or name.matches("win32%") or name = "ntpath"
)
or
result = "linux2" and (
name = "posix" or name = "posixpath"
)
or
result = "unsupported" and (
name = "__pypy__" or name = "ce" or name.matches("riscos%")
)
)
}
string get_os() {
py_flags_versioned("sys.platform", result, major_version().toString())
}
predicate ok_to_fail(ImportExpr ie) {
alternative_import(ie).refersTo(_)
or
os_specific_import(ie) != get_os()
}
from ImportExpr ie
where not ie.refersTo(_) and
exists(Context c | c.appliesTo(ie.getAFlowNode())) and
not ok_to_fail(ie) and
not exists(VersionGuard guard |
if guard.isTrue() then
guard.controls(ie.getAFlowNode().getBasicBlock(), false)
else
guard.controls(ie.getAFlowNode().getBasicBlock(), true)
)
select ie, "Unable to resolve import of '" + ie.getImportedModuleName() + "'."

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Points-to analysis underpins type inference and thus most of Semmle's Python analysis.
Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name Key "points-to" fails for expression.
* @description Expression does not "point-to" an object which prevents further points-to analysis.
* @kind problem
* @problem.severity info
* @id py/key-points-to-failure
*/
import python
predicate points_to_failure(Expr e) {
exists(ControlFlowNode f |
f = e.getAFlowNode() |
not f.refersTo(_)
)
}
predicate key_points_to_failure(Expr e) {
points_to_failure(e) and not points_to_failure(e.getASubExpression())
and
not exists(SsaVariable ssa |
ssa.getAUse() = e.getAFlowNode() |
points_to_failure(ssa.getAnUltimateDefinition().getDefinition().getNode())
)
and
not exists(Assign a | a.getATarget() = e)
}
from Attribute e
where key_points_to_failure(e) and not exists(Call c | c.getFunc() = e)
select e, "Expression does not 'point-to' any object, but all its sources do."

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Points-to analysis underpins type inference and thus most of Semmle's Python analysis.
Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,18 @@
/**
* @name "points-to" fails for expression.
* @description Expression does not "point-to" an object which prevents type inference.
* @kind problem
* @id py/points-to-failure
* @problem.severity info
* @tags reliability
*/
import python
from Expr e
where exists(ControlFlowNode f |
f = e.getAFlowNode() |
not f.refersTo(_)
)
select e, "Expression does not 'point-to' any object."

View File

@@ -0,0 +1,13 @@
import python
import semmle.python.pointsto.PointsTo
from int size
where
size = count(ControlFlowNode f |
not PointsTo::Test::reachableBlock(f.getBasicBlock(), _)
)
select size

View File

@@ -0,0 +1,27 @@
/**
* @name Ratio of jump-to-definitions computed
*/
import python
import DefinitionTracking
predicate want_to_have_definition(Expr e) {
/* not builtin object like len, tuple, etc. */
not exists(Object cobj | e.refersTo(cobj) and cobj.isC()) and
(
e instanceof Name and e.(Name).getCtx() instanceof Load
or
e instanceof Attribute and e.(Attribute).getCtx() instanceof Load
or
e instanceof ImportMember or
e instanceof ImportExpr
)
}
from int yes, int no
where
yes = count(Expr e | want_to_have_definition(e) and exists(getUniqueDefinition(e)))
and
no = count(Expr e | want_to_have_definition(e) and not exists(getUniqueDefinition(e)))
select yes, no, yes*100/(yes+no) + "%"

View File

@@ -0,0 +1,228 @@
/**
* @name Sanity check
* @description General sanity check to be run on any and all code. Should never produce any results.
* @id py/sanity-check
*/
import python
import DefinitionTracking
predicate uniqueness_error(int number, string what, string problem) {
(
what = "toString" or what = "getLocation" or what = "getNode" or what = "getDefinition" or
what = "getEntryNode" or what = "getOrigin" or what = "getAnInferredType"
)
and
(
number = 0 and problem = "no results for " + what + "()"
or
number in [2 .. 10] and problem = number.toString() + " results for " + what + "()"
)
}
predicate ast_sanity(string clsname, string problem, string what) {
exists(AstNode a |
clsname = a.getAQlClass() |
uniqueness_error(count(a.toString()), "toString", problem) and what = "at " + a.getLocation().toString() or
uniqueness_error(strictcount(a.getLocation()), "getLocation", problem) and what = a.getLocation().toString() or
not exists(a.getLocation()) and problem = "no location" and what = a.toString()
)
}
predicate location_sanity(string clsname, string problem, string what) {
exists(Location l |
clsname = l.getAQlClass() |
uniqueness_error(count(l.toString()), "toString", problem) and what = "at " + l.toString() or
not exists(l.toString()) and problem = "no toString" and
(
exists(AstNode thing |
thing.getLocation() = l |
what = "a location of a " + thing.getAQlClass()
)
or
not exists(AstNode thing | thing.getLocation() = l) and
what = "a location"
)
or
l.getEndLine() < l.getStartLine() and problem = "end line before start line" and what = "at " + l.toString()
or
l.getEndLine() = l.getStartLine() and l.getEndColumn() < l.getStartColumn() and
problem = "end column before start column" and what = "at " + l.toString()
)
}
predicate cfg_sanity(string clsname, string problem, string what) {
exists(ControlFlowNode f |
clsname = f.getAQlClass() |
uniqueness_error(count(f.getNode()), "getNode", problem) and what = "at " + f.getLocation().toString() or
not exists(f.getLocation()) and problem = "no location" and what = f.toString() or
uniqueness_error(count(f.(AttrNode).getObject()), "getValue", problem) and what = "at " + f.getLocation().toString()
)
}
predicate scope_sanity(string clsname, string problem, string what) {
exists(Scope s |
clsname = s.getAQlClass() |
uniqueness_error(count(s.getEntryNode()), "getEntryNode", problem) and what = "at " + s.getLocation().toString() or
uniqueness_error(count(s.toString()), "toString", problem) and what = "at " + s.getLocation().toString() or
uniqueness_error(strictcount(s.getLocation()), "getLocation", problem) and what = "at " + s.getLocation().toString() or
not exists(s.getLocation()) and problem = "no location" and what = s.toString()
)
}
string best_description_builtin_object(Object o) {
o.isBuiltin() and
(
result = o.toString()
or
not exists(o.toString()) and py_cobjectnames(o, result)
or
not exists(o.toString()) and not py_cobjectnames(o, _) and result = "builtin object of type " + o.getAnInferredType().toString()
or
not exists(o.toString()) and not py_cobjectnames(o, _) and not exists(o.getAnInferredType().toString()) and result = "builtin object"
)
}
private predicate introspected_builtin_object(Object o) {
/* Only check objects from the extractor, missing data for objects generated from C source code analysis is OK.
* as it will be ignored if it doesn't match up with the introspected form. */
py_cobject_sources(o, 0)
}
predicate builtin_object_sanity(string clsname, string problem, string what) {
exists(Object o |
clsname = o.getAQlClass() and what = best_description_builtin_object(o) and introspected_builtin_object(o) |
not exists(o.getAnInferredType()) and not py_cobjectnames(o, _) and problem = "neither name nor type"
or
uniqueness_error(count(string name | py_cobjectnames(o, name)), "name", problem)
or
not exists(o.getAnInferredType()) and problem = "no results for getAnInferredType"
or
not exists(o.toString()) and problem = "no toString" and
not exists(string name | name.prefix(7) = "_semmle" | py_special_objects(o, name)) and
not o = unknownValue()
)
}
predicate source_object_sanity(string clsname, string problem, string what) {
exists(Object o |
clsname = o.getAQlClass() and not o.isBuiltin() |
uniqueness_error(count(o.getOrigin()), "getOrigin", problem) and what = "at " + o.getOrigin().getLocation().toString()
or
not exists(o.getOrigin().getLocation()) and problem = "no location" and what = "??"
or
not exists(o.toString()) and problem = "no toString" and what = "at " + o.getOrigin().getLocation().toString()
or
strictcount(o.toString()) > 1 and problem = "multiple toStrings()" and what = o.toString()
)
}
predicate ssa_sanity(string clsname, string problem, string what) {
/* Zero or one definitions of each SSA variable */
exists(SsaVariable var |
clsname = var.getAQlClass() |
uniqueness_error(strictcount(var.getDefinition()), "getDefinition", problem) and what = var.getId()
)
or
/* Dominance criterion: Definition *must* dominate *all* uses. */
exists(SsaVariable var, ControlFlowNode defn, ControlFlowNode use |
defn = var.getDefinition() and use = var.getAUse() |
not defn.strictlyDominates(use) and not defn = use and
/* Phi nodes which share a flow node with a use come *before* the use */
not (exists(var.getAPhiInput()) and defn = use) and
clsname = var.getAQlClass() and problem = "a definition which does not dominate a use at " + use.getLocation() and what = var.getId() + " at " + var.getLocation()
)
or
/* Minimality of phi nodes */
exists(SsaVariable var |
strictcount(var.getAPhiInput()) = 1 and
var.getAPhiInput().getDefinition().getBasicBlock().strictlyDominates(var.getDefinition().getBasicBlock())
|
clsname = var.getAQlClass() and problem = " a definition which is dominated by the definition of an incoming phi edge." and what = var.getId() + " at " + var.getLocation()
)
}
predicate function_object_sanity(string clsname, string problem, string what) {
exists(FunctionObject func |
clsname = func.getAQlClass() |
what = func.getName() and
(
count(func.descriptiveString()) = 0 and problem = "no descriptiveString()"
or
exists(int c |
c = strictcount(func.descriptiveString()) and c > 1 |
problem = c + "descriptiveString()s"
)
)
or
not exists(func.getName()) and what = "?" and problem = "no name"
)
}
predicate multiple_origins_per_object(Object obj) {
not obj.isC() and not obj instanceof ModuleObject and
exists(ControlFlowNode use | strictcount(ControlFlowNode orig | use.refersTo(obj, orig)) > 1)
}
predicate intermediate_origins(ControlFlowNode use, ControlFlowNode inter, Object obj) {
exists(ControlFlowNode orig |
not inter = orig |
use.refersTo(obj, inter) and
inter.refersTo(obj, orig) and
// It can sometimes happen that two different modules (e.g. cPickle and Pickle)
// have the same attribute, but different origins.
not strictcount(Object val | inter.(AttrNode).getObject().refersTo(val)) > 1
)
}
predicate points_to_sanity(string clsname, string problem, string what) {
exists(Object obj |
multiple_origins_per_object(obj) and clsname = obj.getAQlClass() and
problem = "multiple origins for an object" and what = obj.toString()
)
or
exists(ControlFlowNode use, ControlFlowNode inter, Object obj |
intermediate_origins(use, inter, obj) and
clsname = use.getAQlClass() and
problem = "has intermediate origin " + inter and
what = use.toString()
)
}
predicate jump_to_definition_sanity(string clsname, string problem, string what) {
problem = "multiple (jump-to) definitions" and
exists(Expr use |
strictcount(getUniqueDefinition(use)) > 1 and
clsname = use.getAQlClass() and
what = use.toString()
)
}
predicate file_sanity(string clsname, string problem, string what) {
exists(File file, Folder folder |
clsname = file.getAQlClass() and
problem = "has same name as a folder" and
what = file.getName() and
what = folder.getName()
) or
exists(Container f |
clsname = f.getAQlClass() and
uniqueness_error(count(f.toString()), "toString", problem) and what = "file " + f.getName()
)
}
from string clsname, string problem, string what
where
ast_sanity(clsname, problem, what) or
location_sanity(clsname, problem, what)or
scope_sanity(clsname, problem, what) or
cfg_sanity(clsname, problem, what) or
ssa_sanity(clsname, problem, what) or
builtin_object_sanity(clsname, problem, what) or
source_object_sanity(clsname, problem, what) or
function_object_sanity(clsname, problem, what) or
points_to_sanity(clsname, problem, what) or
jump_to_definition_sanity(clsname, problem, what) or
file_sanity(clsname, problem, what)
select clsname + " " + what + " has " + problem

View File

@@ -0,0 +1,38 @@
/** Summarize a snapshot
*/
import python
from string key, string value
where
key = "Extractor version" and py_flags_versioned("extractor.version", value, _)
or
key = "Snapshot build time" and exists(date d | snapshotDate(d) and value = d.toString())
or
key = "Interpreter version" and
exists(string major, string minor |
py_flags_versioned("version.major", major, _) and
py_flags_versioned("version.minor", minor, _) and
value = major + "." + minor
)
or
key = "Build platform" and
exists(string raw |
py_flags_versioned("sys.platform", raw, _) |
if raw = "win32" then
value = "Windows"
else if raw = "linux2" then
value = "Linux"
else if raw = "darwin" then
value = "OSX"
else
value = raw
)
or
key = "Source location" and sourceLocationPrefix(value)
or
key = "Lines of code (source)" and value = sum(ModuleMetrics m | exists(m.getFile().getRelativePath()) | m.getNumberOfLinesOfCode()).toString()
or
key = "Lines of code (total)" and value = sum(ModuleMetrics m | any() | m.getNumberOfLinesOfCode()).toString()
select key, value

View File

@@ -0,0 +1,15 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>In order to analyse uses of a class, all its attributes need to be known. Without the full inheritance hierarchy this is impossible.
This is an informational query only.
</p>
<p>
This is an informational query only, this query depends on points-to and type inference.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,16 @@
/**
* @name Inheritance hierarchy cannot be inferred for class
* @description Inability to infer inheritance hierarchy cannot be inferred for class will impair analysis
* @id py/failed-inheritance-inference
* @kind problem
* @problem.severity info
*/
import python
from Class cls
where not exists(ClassObject c | c.getPyClass() = cls)
or
exists(ClassObject c | c.getPyClass() = cls | c.failedInference())
select cls, "Inference of class hierarchy failed for class."

View File

@@ -0,0 +1,13 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Type inference is the key part of Semmle's Python analysis.
Failures in type inference and reduces the coverage and also accuracy of many queries.
</p>
</overview>
</qhelp>

View File

@@ -0,0 +1,14 @@
/**
* @name Type inference fails for 'object'
* @description Type inference fails for 'object' which reduces recall for many queries.
* @kind problem
* @problem.severity info
* @id py/type-inference-failure
*/
import python
from ControlFlowNode f, Object o
where f.refersTo(o) and
not exists(ClassObject c | f.refersTo(o, c, _))
select o, "Type inference fails for 'object'."