mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Initial commit of Python queries and QL libraries.
This commit is contained in:
committed by
Mark Shannon
parent
90c75cd362
commit
5f58824d1b
126
python/ql/src/analysis/AlertSuppression.ql
Normal file
126
python/ql/src/analysis/AlertSuppression.ql
Normal file
@@ -0,0 +1,126 @@
|
||||
/**
|
||||
* @name Alert suppression
|
||||
* @description Generates information about alert suppressions.
|
||||
* @kind alert-suppression
|
||||
* @id py/alert-suppression
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
/**
|
||||
* An alert suppression comment.
|
||||
*/
|
||||
abstract class SuppressionComment extends Comment {
|
||||
|
||||
/** Gets the scope of this suppression. */
|
||||
abstract SuppressionScope getScope();
|
||||
|
||||
/** Gets the suppression annotation in this comment. */
|
||||
abstract string getAnnotation();
|
||||
|
||||
/**
|
||||
* Holds if this comment applies to the range from column `startcolumn` of line `startline`
|
||||
* to column `endcolumn` of line `endline` in file `filepath`.
|
||||
*/
|
||||
abstract predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* An alert comment that applies to a single line
|
||||
*/
|
||||
abstract class LineSuppressionComment extends SuppressionComment {
|
||||
|
||||
LineSuppressionComment() {
|
||||
exists(string filepath, int l |
|
||||
this.getLocation().hasLocationInfo(filepath, l, _, _, _) and
|
||||
any(AstNode a).getLocation().hasLocationInfo(filepath, l, _, _, _)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the scope of this suppression. */
|
||||
override SuppressionScope getScope() {
|
||||
result = this
|
||||
}
|
||||
|
||||
override predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and
|
||||
startcolumn = 1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* An lgtm suppression comment.
|
||||
*/
|
||||
class LgtmSuppressionComment extends LineSuppressionComment {
|
||||
|
||||
string annotation;
|
||||
|
||||
LgtmSuppressionComment() {
|
||||
exists(string all |
|
||||
all = this.getContents()
|
||||
|
|
||||
// match `lgtm[...]` anywhere in the comment
|
||||
annotation = all.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _)
|
||||
or
|
||||
// match `lgtm` at the start of the comment and after semicolon
|
||||
annotation = all.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the suppression annotation in this comment. */
|
||||
override string getAnnotation() {
|
||||
result = annotation
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A noqa suppression comment. Both pylint and pyflakes respect this, so lgtm ought to too.
|
||||
*/
|
||||
class NoqaSuppressionComment extends LineSuppressionComment {
|
||||
|
||||
NoqaSuppressionComment() {
|
||||
this.getContents().toLowerCase().regexpMatch("\\s*noqa\\s*")
|
||||
}
|
||||
|
||||
override string getAnnotation() {
|
||||
result = "lgtm"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The scope of an alert suppression comment.
|
||||
*/
|
||||
class SuppressionScope extends @py_comment {
|
||||
|
||||
SuppressionScope() {
|
||||
this instanceof SuppressionComment
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [LGTM locations](https://lgtm.com/help/ql/locations).
|
||||
*/
|
||||
predicate hasLocationInfo(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
|
||||
this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
result = "suppression range"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
from SuppressionComment c
|
||||
select c, // suppression comment
|
||||
c.getContents(), // text of suppression comment (excluding delimiters)
|
||||
c.getAnnotation(), // text of suppression annotation
|
||||
c.getScope() // scope of suppression
|
||||
25
python/ql/src/analysis/CallGraphEfficiency.ql
Normal file
25
python/ql/src/analysis/CallGraphEfficiency.ql
Normal file
@@ -0,0 +1,25 @@
|
||||
/** Compute the total call-graph facts, the total size of the call-graph relation and
|
||||
* the ratio of the two in relation to the depth of context.
|
||||
*/
|
||||
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
import semmle.python.pointsto.PointsToContext
|
||||
|
||||
from int total_facts, int total_size, int depth, float efficiency
|
||||
where
|
||||
total_facts = strictcount(ControlFlowNode call, FunctionObject func |
|
||||
exists(PointsToContext ctx |
|
||||
call = PointsTo::get_a_call(func, ctx) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
)
|
||||
and
|
||||
total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx |
|
||||
call = PointsTo::get_a_call(func, ctx) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
and
|
||||
efficiency = 100.0 * total_facts / total_size
|
||||
select depth, total_facts, total_size, efficiency
|
||||
29
python/ql/src/analysis/CallGraphMarginalEfficiency.ql
Normal file
29
python/ql/src/analysis/CallGraphMarginalEfficiency.ql
Normal file
@@ -0,0 +1,29 @@
|
||||
/** Compute the marginal increase call-graph facts, the total size of the call-graph relation and
|
||||
* the ratio of the two in relation to the depth of context.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
import semmle.python.pointsto.PointsToContext
|
||||
|
||||
from int total_facts, int total_size, int depth, float efficiency
|
||||
where
|
||||
total_facts = strictcount(ControlFlowNode call, FunctionObject func |
|
||||
exists(PointsToContext ctx |
|
||||
call = PointsTo::get_a_call(func, ctx) and
|
||||
depth = ctx.getDepth()
|
||||
and not
|
||||
exists(PointsToContext shallower |
|
||||
call = PointsTo::get_a_call(func, shallower) and
|
||||
shallower.getDepth() < depth
|
||||
)
|
||||
)
|
||||
)
|
||||
and
|
||||
total_size = strictcount(ControlFlowNode call, FunctionObject func, PointsToContext ctx |
|
||||
call = PointsTo::get_a_call(func, ctx) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
and
|
||||
efficiency = 100.0 * total_facts / total_size
|
||||
select depth, total_facts, total_size, efficiency
|
||||
25
python/ql/src/analysis/ContextEfficiency.ql
Normal file
25
python/ql/src/analysis/ContextEfficiency.ql
Normal file
@@ -0,0 +1,25 @@
|
||||
/** Compute the total points-to facts, the total size of the points-to relation and
|
||||
* the ratio of the two in relation to the depth of context.
|
||||
*/
|
||||
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
import semmle.python.pointsto.PointsToContext
|
||||
|
||||
from int total_facts, int total_size, int depth, float efficiency
|
||||
where
|
||||
total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
|
||||
exists(PointsToContext ctx |
|
||||
PointsTo::points_to(f, ctx, value, cls, _) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
)
|
||||
and
|
||||
total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig |
|
||||
PointsTo::points_to(f, ctx, value, cls, orig) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
and
|
||||
efficiency = 100.0 * total_facts / total_size
|
||||
select depth, total_facts, total_size, efficiency
|
||||
32
python/ql/src/analysis/ContextMarginalEfficiency.ql
Normal file
32
python/ql/src/analysis/ContextMarginalEfficiency.ql
Normal file
@@ -0,0 +1,32 @@
|
||||
/** Compute the marginal increase points-to facts, the total size of the points-to relation and
|
||||
* the ratio of the two in relation to the depth of context.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
import semmle.python.pointsto.PointsToContext
|
||||
|
||||
int depth(ControlFlowNode f, Object value, ClassObject cls) {
|
||||
exists(PointsToContext ctx |
|
||||
PointsTo::points_to(f, ctx, value, cls, _) and
|
||||
result = ctx.getDepth()
|
||||
)
|
||||
}
|
||||
|
||||
int shallowest(ControlFlowNode f, Object value, ClassObject cls) {
|
||||
result = min(int x | x = depth(f, value, cls))
|
||||
}
|
||||
|
||||
from int total_facts, int total_size, int depth, float efficiency
|
||||
where
|
||||
total_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
|
||||
depth = shallowest(f, value, cls)
|
||||
)
|
||||
and
|
||||
total_size = strictcount(ControlFlowNode f, Object value, ClassObject cls, PointsToContext ctx, ControlFlowNode orig |
|
||||
PointsTo::points_to(f, ctx, value, cls, orig) and
|
||||
depth = ctx.getDepth()
|
||||
)
|
||||
and
|
||||
efficiency = 100.0 * total_facts / total_size
|
||||
select depth, total_facts, total_size, efficiency
|
||||
115
python/ql/src/analysis/CrossProjectDefinitions.qll
Normal file
115
python/ql/src/analysis/CrossProjectDefinitions.qll
Normal file
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Symbols for crosss-project jump-to-definition resolution.
|
||||
*/
|
||||
import python
|
||||
|
||||
import semmle.dataflow.SSA
|
||||
import semmle.python.pointsto.PointsTo
|
||||
|
||||
private newtype TSymbol =
|
||||
TModule(Module m)
|
||||
or
|
||||
TMember(Symbol outer, string part) {
|
||||
exists(Object o |
|
||||
outer.resolvesTo() = o |
|
||||
o.(ModuleObject).hasAttribute(part)
|
||||
or
|
||||
o.(ClassObject).hasAttribute(part)
|
||||
)
|
||||
}
|
||||
|
||||
/** A "symbol" referencing an object in another module
|
||||
* Symbols are represented by the module name and the dotted name by which the
|
||||
* object would be referred to in that module.
|
||||
* For example for the code:
|
||||
* ```
|
||||
* class C:
|
||||
* def m(self): pass
|
||||
* ```
|
||||
* If the code were in a module `mod`,
|
||||
* then symbol for the method `m` would be "mod/C.m"
|
||||
*/
|
||||
class Symbol extends TSymbol {
|
||||
|
||||
string toString() {
|
||||
exists(Module m |
|
||||
this = TModule(m) and result = m.getName()
|
||||
)
|
||||
or
|
||||
exists(TModule outer, string part |
|
||||
this = TMember(outer, part) and
|
||||
outer = TModule(_) and
|
||||
result = outer.(Symbol).toString() + "/" + part
|
||||
)
|
||||
or
|
||||
exists(TMember outer, string part |
|
||||
this = TMember(outer, part) and
|
||||
outer = TMember(_, _) and
|
||||
result = outer.(Symbol).toString() + "." + part
|
||||
)
|
||||
}
|
||||
|
||||
/** Finds the `AstNode` that this `Symbol` refers to.
|
||||
*/
|
||||
AstNode find() {
|
||||
this = TModule(result)
|
||||
or
|
||||
exists(Symbol s, string name |
|
||||
this = TMember(s, name) |
|
||||
exists(ClassObject cls |
|
||||
s.resolvesTo() = cls and
|
||||
cls.attributeRefersTo(name, _, result.getAFlowNode())
|
||||
)
|
||||
or
|
||||
exists(ModuleObject m |
|
||||
s.resolvesTo() = m and
|
||||
m.attributeRefersTo(name, _, result.getAFlowNode())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Find the class or module `Object` that this `Symbol` refers to, if
|
||||
* this `Symbol` refers to a class or module.
|
||||
*/
|
||||
Object resolvesTo() {
|
||||
this = TModule(result.(ModuleObject).getModule())
|
||||
or
|
||||
exists(Symbol s, string name, Object o |
|
||||
this = TMember(s, name) and
|
||||
o = s.resolvesTo() and
|
||||
result = attribute_in_scope(o, name)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the `Module` for the module part of this `Symbol`.
|
||||
* For example, this would return the `os` module for the `Symbol` "os/environ".
|
||||
*/
|
||||
Module getModule() {
|
||||
this = TModule(result)
|
||||
or
|
||||
exists(Symbol outer |
|
||||
this = TMember(outer, _) and result = outer.getModule()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the `Symbol` that is the named member of this `Symbol`.
|
||||
*/
|
||||
Symbol getMember(string name) {
|
||||
result = TMember(this, name)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Helper for `Symbol`.resolvesTo() */
|
||||
private Object attribute_in_scope(Object obj, string name) {
|
||||
exists(ClassObject cls |
|
||||
cls = obj |
|
||||
cls.lookupAttribute(name) = result and result.(ControlFlowNode).getScope() = cls.getPyClass()
|
||||
)
|
||||
or
|
||||
exists(ModuleObject mod |
|
||||
mod = obj |
|
||||
mod.getAttribute(name) = result and result.(ControlFlowNode).getScope() = mod.getModule()
|
||||
and not result.(ControlFlowNode).isEntryNode()
|
||||
)
|
||||
}
|
||||
483
python/ql/src/analysis/DefinitionTracking.qll
Normal file
483
python/ql/src/analysis/DefinitionTracking.qll
Normal file
@@ -0,0 +1,483 @@
|
||||
/**
|
||||
* Definition tracking for jump-to-defn query.
|
||||
*/
|
||||
import python
|
||||
|
||||
import semmle.dataflow.SSA
|
||||
import semmle.python.pointsto.PointsTo
|
||||
|
||||
private newtype TDefinition =
|
||||
TLocalDefinition(AstNode a) {
|
||||
a instanceof Expr or a instanceof Stmt or a instanceof Module
|
||||
}
|
||||
|
||||
/** A definition for the purposes of jump-to-definition.
|
||||
*/
|
||||
class Definition extends TLocalDefinition {
|
||||
|
||||
|
||||
string toString() {
|
||||
result = "Definition " + this.getAstNode().getLocation().toString()
|
||||
}
|
||||
|
||||
AstNode getAstNode() {
|
||||
this = TLocalDefinition(result)
|
||||
}
|
||||
|
||||
Module getModule() {
|
||||
result = this.getAstNode().getScope().getEnclosingModule()
|
||||
}
|
||||
|
||||
Location getLocation() {
|
||||
result = this.getAstNode().getLocation()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private predicate jump_to_defn(ControlFlowNode use, Definition defn) {
|
||||
exists(EssaVariable var |
|
||||
use = var.getASourceUse() and
|
||||
ssa_variable_defn(var, defn)
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
use.isLoad() and
|
||||
jump_to_defn_attribute(use.(AttrNode).getObject(name), name, defn)
|
||||
)
|
||||
or
|
||||
exists(PythonModuleObject mod |
|
||||
use.(ImportExprNode).refersTo(mod) and
|
||||
defn.getAstNode() = mod.getModule()
|
||||
)
|
||||
or
|
||||
exists(PythonModuleObject mod, string name |
|
||||
use.(ImportMemberNode).getModule(name).refersTo(mod) and
|
||||
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
|
||||
)
|
||||
or
|
||||
exists(PackageObject package |
|
||||
use.(ImportExprNode).refersTo(package) and
|
||||
defn.getAstNode() = package.getInitModule().getModule()
|
||||
)
|
||||
or
|
||||
exists(PackageObject package, string name |
|
||||
use.(ImportMemberNode).getModule(name).refersTo(package) and
|
||||
scope_jump_to_defn_attribute(package.getInitModule().getModule(), name, defn)
|
||||
)
|
||||
or
|
||||
(use instanceof PyFunctionObject or use instanceof ClassObject) and
|
||||
defn.getAstNode() = use.getNode()
|
||||
}
|
||||
|
||||
/* Prefer class and functions to class-expressions and function-expressions. */
|
||||
private predicate preferred_jump_to_defn(Expr use, Definition def) {
|
||||
not use instanceof ClassExpr and
|
||||
not use instanceof FunctionExpr and
|
||||
jump_to_defn(use.getAFlowNode(), def)
|
||||
}
|
||||
|
||||
private predicate unique_jump_to_defn(Expr use, Definition def) {
|
||||
preferred_jump_to_defn(use, def) and
|
||||
not exists(Definition other |
|
||||
other != def and
|
||||
preferred_jump_to_defn(use, other)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate ssa_variable_defn(EssaVariable var, Definition defn) {
|
||||
ssa_defn_defn(var.getDefinition(), defn)
|
||||
}
|
||||
|
||||
/** Holds if the phi-function `phi` refers to (`value`, `cls`, `origin`) given the context `context`. */
|
||||
private predicate ssa_phi_defn(PhiFunction phi, Definition defn) {
|
||||
ssa_variable_defn(phi.getAnInput(), defn)
|
||||
}
|
||||
|
||||
/** Holds if the ESSA defn `def` refers to (`value`, `cls`, `origin`) given the context `context`. */
|
||||
private predicate ssa_defn_defn(EssaDefinition def, Definition defn) {
|
||||
ssa_phi_defn(def, defn)
|
||||
or
|
||||
ssa_node_defn(def, defn)
|
||||
or
|
||||
ssa_filter_defn(def, defn)
|
||||
or
|
||||
ssa_node_refinement_defn(def, defn)
|
||||
}
|
||||
|
||||
/** Holds if ESSA edge refinement, `def`, is defined by `defn` */
|
||||
predicate ssa_filter_defn(PyEdgeRefinement def, Definition defn) {
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/** Holds if ESSA defn, `uniphi`,is defined by `defn` */
|
||||
predicate uni_edged_phi_defn(SingleSuccessorGuard uniphi, Definition defn) {
|
||||
ssa_variable_defn(uniphi.getInput(), defn)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate ssa_node_defn(EssaNodeDefinition def, Definition defn) {
|
||||
assignment_jump_to_defn(def, defn)
|
||||
or
|
||||
parameter_defn(def, defn)
|
||||
or
|
||||
delete_defn(def, defn)
|
||||
or
|
||||
scope_entry_defn(def, defn)
|
||||
or
|
||||
implicit_submodule_defn(def, defn)
|
||||
}
|
||||
|
||||
/* Definition for normal assignments `def = ...` */
|
||||
private predicate assignment_jump_to_defn(AssignmentDefinition def, Definition defn) {
|
||||
defn = TLocalDefinition(def.getValue().getNode())
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate ssa_node_refinement_defn(EssaNodeRefinement def, Definition defn) {
|
||||
method_callsite_defn(def, defn)
|
||||
or
|
||||
import_star_defn(def, defn)
|
||||
or
|
||||
attribute_assignment_defn(def, defn)
|
||||
or
|
||||
callsite_defn(def, defn)
|
||||
or
|
||||
argument_defn(def, defn)
|
||||
or
|
||||
attribute_delete_defn(def, defn)
|
||||
or
|
||||
uni_edged_phi_defn(def, defn)
|
||||
}
|
||||
|
||||
|
||||
/* Definition for parameter. `def foo(param): ...` */
|
||||
private predicate parameter_defn(ParameterDefinition def, Definition defn) {
|
||||
defn.getAstNode() = def.getDefiningNode().getNode()
|
||||
}
|
||||
|
||||
/* Definition for deletion: `del name` */
|
||||
private predicate delete_defn(DeletionDefinition def, Definition defn) {
|
||||
none()
|
||||
}
|
||||
|
||||
/* Implicit "defn" of the names of submodules at the start of an `__init__.py` file.
|
||||
*/
|
||||
private predicate implicit_submodule_defn(ImplicitSubModuleDefinition def, Definition defn) {
|
||||
exists(PackageObject package, ModuleObject mod |
|
||||
package.getInitModule().getModule() = def.getDefiningNode().getScope() and
|
||||
mod = package.submodule(def.getSourceVariable().getName()) and
|
||||
defn.getAstNode() = mod.getModule()
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
/* Helper for scope_entry_value_transfer(...). Transfer of values from the callsite to the callee, for enclosing variables, but not arguments/parameters */
|
||||
private predicate scope_entry_value_transfer_at_callsite(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
|
||||
exists(CallNode callsite, FunctionObject f |
|
||||
f.getACall() = callsite and
|
||||
pred_var.getSourceVariable() = succ_def.getSourceVariable() and
|
||||
pred_var.getAUse() = callsite and
|
||||
succ_def.getDefiningNode() = f.getFunction().getEntryNode()
|
||||
)
|
||||
}
|
||||
|
||||
/* Model the transfer of values at scope-entry points. Transfer from `pred_var, pred_context` to `succ_def, succ_context` */
|
||||
private
|
||||
predicate scope_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
|
||||
BaseFlow::scope_entry_value_transfer_from_earlier(pred_var, _, succ_def, _)
|
||||
or
|
||||
scope_entry_value_transfer_at_callsite(pred_var, succ_def)
|
||||
or
|
||||
class_entry_value_transfer(pred_var, succ_def)
|
||||
}
|
||||
|
||||
/* Helper for scope_entry_value_transfer */
|
||||
private
|
||||
predicate class_entry_value_transfer(EssaVariable pred_var, ScopeEntryDefinition succ_def) {
|
||||
exists(ImportTimeScope scope, ControlFlowNode class_def |
|
||||
class_def = pred_var.getAUse() and
|
||||
scope.entryEdge(class_def, succ_def.getDefiningNode()) and
|
||||
pred_var.getSourceVariable() = succ_def.getSourceVariable()
|
||||
)
|
||||
}
|
||||
|
||||
/* Definition for implicit variable declarations at scope-entry. */
|
||||
pragma [noinline]
|
||||
private predicate scope_entry_defn(ScopeEntryDefinition def, Definition defn) {
|
||||
/* Transfer from another scope */
|
||||
exists(EssaVariable var |
|
||||
scope_entry_value_transfer(var, def) and
|
||||
ssa_variable_defn(var, defn)
|
||||
)
|
||||
}
|
||||
|
||||
/* Definition for a variable (possibly) redefined by a call:
|
||||
* Just assume that call does not define variable
|
||||
*/
|
||||
pragma [noinline]
|
||||
private predicate callsite_defn(CallsiteRefinement def, Definition defn) {
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/* Pass through for `self` for the implicit re-defn of `self` in `self.foo()` */
|
||||
private predicate method_callsite_defn(MethodCallsiteRefinement def, Definition defn) {
|
||||
/* The value of self remains the same, only the attributes may change */
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/** Helpers for import_star_defn */
|
||||
pragma [noinline]
|
||||
private predicate module_and_name_for_import_star(ModuleObject mod, string name, ImportStarRefinement def) {
|
||||
exists(ImportStarNode im_star |
|
||||
im_star = def.getDefiningNode() |
|
||||
name = def.getSourceVariable().getName() and
|
||||
im_star.getModule().refersTo(mod) and
|
||||
mod.exports(name)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `def` is technically a defn of `var`, but the `from ... import *` does not in fact define `var` */
|
||||
pragma [noinline]
|
||||
private predicate variable_not_redefined_by_import_star(EssaVariable var, ImportStarRefinement def) {
|
||||
var = def.getInput() and
|
||||
exists(ModuleObject mod |
|
||||
def.getDefiningNode().(ImportStarNode).getModule().refersTo(mod) and
|
||||
not mod.exports(var.getSourceVariable().getName())
|
||||
)
|
||||
}
|
||||
|
||||
/* Definition for `from ... import *` */
|
||||
private predicate import_star_defn(ImportStarRefinement def, Definition defn) {
|
||||
exists(ModuleObject mod, string name |
|
||||
module_and_name_for_import_star(mod, name, def) |
|
||||
/* Attribute from imported module */
|
||||
scope_jump_to_defn_attribute(mod.getModule(), name, defn)
|
||||
)
|
||||
or
|
||||
exists(EssaVariable var |
|
||||
/* Retain value held before import */
|
||||
variable_not_redefined_by_import_star(var, def) and
|
||||
ssa_variable_defn(var, defn)
|
||||
)
|
||||
}
|
||||
|
||||
/** Attribute assignments have no effect as far as defn tracking is concerned */
|
||||
private predicate attribute_assignment_defn(AttributeAssignment def, Definition defn) {
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/** Ignore the effects of calls on their arguments. This is an approximation, but attempting to improve accuracy would be very expensive for very little gain. */
|
||||
private predicate argument_defn(ArgumentRefinement def, Definition defn) {
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/** Attribute deletions have no effect as far as value tracking is concerned. */
|
||||
pragma [noinline]
|
||||
private predicate attribute_delete_defn(EssaAttributeDeletion def, Definition defn) {
|
||||
ssa_variable_defn(def.getInput(), defn)
|
||||
}
|
||||
|
||||
/* Definition flow for attributes. These mirror the "normal" defn predicates.
|
||||
* For each defn predicate `xxx_defn(XXX def, Definition defn)`
|
||||
* There is an equivalent predicate that tracks the values in attributes:
|
||||
* `xxx_jump_to_defn_attribute(XXX def, string name, Definition defn)`
|
||||
* */
|
||||
|
||||
/** INTERNAL -- Public for testing only.
|
||||
* Holds if the attribute `name` of the ssa variable `var` refers to (`value`, `cls`, `origin`)
|
||||
*/
|
||||
predicate ssa_variable_jump_to_defn_attribute(EssaVariable var, string name, Definition defn) {
|
||||
ssa_defn_jump_to_defn_attribute(var.getDefinition(), name, defn)
|
||||
}
|
||||
|
||||
/** Helper for ssa_variable_jump_to_defn_attribute */
|
||||
private predicate ssa_defn_jump_to_defn_attribute(EssaDefinition def, string name, Definition defn) {
|
||||
ssa_phi_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
ssa_node_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
ssa_node_refinement_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
ssa_filter_jump_to_defn_attribute(def, name, defn)
|
||||
}
|
||||
|
||||
/** Holds if ESSA edge refinement, `def`, is defined by `defn` of `priority` */
|
||||
predicate ssa_filter_jump_to_defn_attribute(PyEdgeRefinement def, string name, Definition defn) {
|
||||
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn)
|
||||
}
|
||||
|
||||
/** Holds if the attribute `name` of the ssa phi-function defn `phi` refers to (`value`, `cls`, `origin`) */
|
||||
private predicate ssa_phi_jump_to_defn_attribute(PhiFunction phi, string name, Definition defn) {
|
||||
ssa_variable_jump_to_defn_attribute(phi.getAnInput(), name, defn)
|
||||
}
|
||||
|
||||
/** Helper for ssa_defn_jump_to_defn_attribute */
|
||||
pragma[noinline]
|
||||
private predicate ssa_node_jump_to_defn_attribute(EssaNodeDefinition def, string name, Definition defn) {
|
||||
assignment_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
self_parameter_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
scope_entry_jump_to_defn_attribute(def, name, defn)
|
||||
}
|
||||
|
||||
/** Helper for ssa_defn_jump_to_defn_attribute */
|
||||
pragma[noinline]
|
||||
private predicate ssa_node_refinement_jump_to_defn_attribute(EssaNodeRefinement def, string name, Definition defn) {
|
||||
attribute_assignment_jump_to_defn_attribute(def, name, defn)
|
||||
or
|
||||
argument_jump_to_defn_attribute(def, name, defn)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate scope_entry_jump_to_defn_attribute(ScopeEntryDefinition def, string name, Definition defn) {
|
||||
exists(EssaVariable var |
|
||||
scope_entry_value_transfer(var, def) and
|
||||
ssa_variable_jump_to_defn_attribute(var, name, defn)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate scope_jump_to_defn_attribute(ImportTimeScope s, string name, Definition defn) {
|
||||
exists(EssaVariable var |
|
||||
BaseFlow::reaches_exit(var) and var.getScope() = s and
|
||||
var.getName() = name
|
||||
|
|
||||
ssa_variable_defn(var, defn)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate jump_to_defn_attribute(ControlFlowNode use, string name, Definition defn) {
|
||||
/* Local attribute */
|
||||
exists(EssaVariable var |
|
||||
use = var.getASourceUse() and
|
||||
ssa_variable_jump_to_defn_attribute(var, name, defn)
|
||||
)
|
||||
or
|
||||
/* Instance attributes */
|
||||
exists(ClassObject cls |
|
||||
use.refersTo(_, cls, _) |
|
||||
scope_jump_to_defn_attribute(cls.getPyClass(), name, defn)
|
||||
)
|
||||
or
|
||||
/* Super attributes */
|
||||
exists(AttrNode f, SuperBoundMethod sbm, Object function |
|
||||
use = f.getObject(name) and
|
||||
f.refersTo(sbm) and function = sbm.getFunction(_) and
|
||||
function.getOrigin() = defn.getAstNode()
|
||||
)
|
||||
or
|
||||
/* Class or module attribute */
|
||||
exists(Object obj, Scope scope |
|
||||
use.refersTo(obj) and
|
||||
scope_jump_to_defn_attribute(scope, name, defn) |
|
||||
obj.(ClassObject).getPyClass() = scope
|
||||
or
|
||||
obj.(PythonModuleObject).getModule() = scope
|
||||
or
|
||||
obj.(PackageObject).getInitModule().getModule() = scope
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate assignment_jump_to_defn_attribute(AssignmentDefinition def, string name, Definition defn) {
|
||||
jump_to_defn_attribute(def.getValue(), name, defn)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate attribute_assignment_jump_to_defn_attribute(AttributeAssignment def, string name, Definition defn) {
|
||||
defn.getAstNode() = def.getDefiningNode().getNode() and name = def.getName()
|
||||
or
|
||||
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn) and not name = def.getName()
|
||||
}
|
||||
|
||||
/** Holds if `def` defines the attribute `name`
|
||||
* `def` takes the form `setattr(use, "name")` where `use` is the input to the defn.
|
||||
*/
|
||||
private predicate sets_attribute(ArgumentRefinement def, string name) {
|
||||
exists(CallNode call |
|
||||
call = def.getDefiningNode() and
|
||||
call.getFunction().refersTo(builtin_object("setattr")) and
|
||||
def.getInput().getAUse() = call.getArg(0) and
|
||||
call.getArg(1).getNode().(StrConst).getText() = name
|
||||
)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate argument_jump_to_defn_attribute(ArgumentRefinement def, string name, Definition defn) {
|
||||
if sets_attribute(def, name) then
|
||||
jump_to_defn(def.getDefiningNode().(CallNode).getArg(2), defn)
|
||||
else
|
||||
ssa_variable_jump_to_defn_attribute(def.getInput(), name, defn)
|
||||
}
|
||||
|
||||
/** Gets the (temporally) preceding variable for "self", e.g. `def` is in method foo() and `result` is in `__init__()`. */
|
||||
private EssaVariable preceding_self_variable(ParameterDefinition def) {
|
||||
def.isSelf() and
|
||||
exists(Function preceding, Function method |
|
||||
method = def.getScope() and
|
||||
// Only methods
|
||||
preceding.isMethod() and preceding.precedes(method) and
|
||||
BaseFlow::reaches_exit(result) and result.getSourceVariable().(Variable).isSelf() and
|
||||
result.getScope() = preceding
|
||||
)
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate self_parameter_jump_to_defn_attribute(ParameterDefinition def, string name, Definition defn) {
|
||||
ssa_variable_jump_to_defn_attribute(preceding_self_variable(def), name, defn)
|
||||
}
|
||||
|
||||
/** Gets a definition for 'use'.
|
||||
* This exists primarily for testing use `getPreferredDefinition()` instead.
|
||||
*/
|
||||
Definition getADefinition(Expr use) {
|
||||
jump_to_defn(use.getAFlowNode(), result) and
|
||||
not use instanceof Call and
|
||||
not use.isArtificial() and
|
||||
// Not the use itself
|
||||
not result = TLocalDefinition(use)
|
||||
}
|
||||
|
||||
/** Gets the unique definition for 'use', if one can be found.
|
||||
* Helper for the jump-to-definition query.
|
||||
*/
|
||||
Definition getUniqueDefinition(Expr use) {
|
||||
unique_jump_to_defn(use, result) and
|
||||
not use instanceof Call and
|
||||
not use.isArtificial() and
|
||||
// Not the use itself
|
||||
not result = TLocalDefinition(use)
|
||||
}
|
||||
|
||||
|
||||
/** Helper class to get suitable locations for attributes */
|
||||
class NiceLocationExpr extends @py_expr {
|
||||
|
||||
string toString() {
|
||||
result = this.(Expr).toString()
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(string f, int bl, int bc, int el, int ec) {
|
||||
/* Attribute location for x.y is that of 'y' so that url does not overlap with that of 'x' */
|
||||
exists(int abl, int abc |
|
||||
this.(Attribute).getLocation().hasLocationInfo(f, abl, abc, el, ec) |
|
||||
bl = el and bc = ec - this.(Attribute).getName().length() + 1
|
||||
)
|
||||
or
|
||||
this.(Name).getLocation().hasLocationInfo(f, bl, bc, el, ec)
|
||||
or
|
||||
/* Show xxx for `xxx` in `from xxx import y` or
|
||||
* for `import xxx` or for `import xxx as yyy`. */
|
||||
this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec)
|
||||
or
|
||||
/* Show y for `y` in `from xxx import y` */
|
||||
exists(string name |
|
||||
name = this.(ImportMember).getName() and
|
||||
this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and
|
||||
bl = el and bc = ec-name.length()+1
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
17
python/ql/src/analysis/Definitions.ql
Normal file
17
python/ql/src/analysis/Definitions.ql
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* @name Definitions
|
||||
* @description Jump to definition helper query.
|
||||
* @kind definitions
|
||||
* @id py/jump-to-definition
|
||||
*/
|
||||
|
||||
import python
|
||||
import DefinitionTracking
|
||||
|
||||
|
||||
from NiceLocationExpr use, Definition defn, string kind, string f, int l
|
||||
where defn = getUniqueDefinition(use) and kind = "Definition"
|
||||
and use.hasLocationInfo(f, l, _, _, _) and
|
||||
// Ignore if the definition is on the same line as the use
|
||||
not defn.getLocation().hasLocationInfo(f, l, _, _, _)
|
||||
select use, defn, kind
|
||||
33
python/ql/src/analysis/Efficiency.ql
Normal file
33
python/ql/src/analysis/Efficiency.ql
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Compute the efficiency of the points-to relation. That is the ratio of
|
||||
* "interesting" facts to total facts.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
import semmle.python.pointsto.PointsToContext
|
||||
|
||||
predicate trivial(ControlFlowNode f) {
|
||||
exists(Parameter p | p = f.getNode())
|
||||
or
|
||||
f instanceof NameConstantNode
|
||||
or
|
||||
f.getNode() instanceof ImmutableLiteral
|
||||
}
|
||||
|
||||
from int interesting_facts, int interesting_facts_in_source, int total_size,float efficiency
|
||||
where
|
||||
interesting_facts = strictcount(ControlFlowNode f, Object value, ClassObject cls |
|
||||
f.refersTo(value, cls, _) and not trivial(f)
|
||||
)
|
||||
and
|
||||
interesting_facts_in_source = strictcount(ControlFlowNode f, Object value, ClassObject cls |
|
||||
f.refersTo(value, cls, _) and not trivial(f) and exists(f.getScope().getEnclosingModule().getFile().getRelativePath())
|
||||
)
|
||||
and
|
||||
total_size = strictcount(ControlFlowNode f, PointsToContext ctx, Object value, ClassObject cls, ControlFlowNode orig |
|
||||
PointsTo::points_to(f, ctx, value, cls, orig)
|
||||
)
|
||||
and
|
||||
efficiency = 100.0 * interesting_facts_in_source / total_size
|
||||
select interesting_facts, interesting_facts_in_source, total_size, efficiency
|
||||
11
python/ql/src/analysis/FailedInference.ql
Normal file
11
python/ql/src/analysis/FailedInference.ql
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
|
||||
from ClassObject cls, string reason
|
||||
|
||||
where
|
||||
PointsTo::Types::failed_inference(cls, reason)
|
||||
|
||||
select cls, reason
|
||||
|
||||
28
python/ql/src/analysis/ImportFailure.qhelp
Normal file
28
python/ql/src/analysis/ImportFailure.qhelp
Normal file
@@ -0,0 +1,28 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>Tracing which module is imported by an import statement is very important in ensuring that the whole program is available
|
||||
for analysis. Failure to determine which module is imported by an import reduces the extent and accuracy of Semmle's analysis.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Missing imports will degrade the effectiveness of code analysis and may result in errors going undetected.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>
|
||||
Ensure that all required modules and packages can be found when running the extractor.
|
||||
</p>
|
||||
|
||||
|
||||
</recommendation>
|
||||
<references>
|
||||
|
||||
<li>Semmle Tutorial: <a href="https://semmle.com/wiki/pages/viewpage.action?pageId=9493108">Basic project creation (Python)</a>.</li>
|
||||
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
71
python/ql/src/analysis/ImportFailure.ql
Normal file
71
python/ql/src/analysis/ImportFailure.ql
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* @name Unresolved import
|
||||
* @description An unresolved import may result in reduced coverage and accuracy of analysis.
|
||||
* @kind problem
|
||||
* @problem.severity info
|
||||
* @id py/import-failure
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
ImportExpr alternative_import(ImportExpr ie) {
|
||||
exists(Alias thisalias, Alias otheralias |
|
||||
(thisalias.getValue() = ie or ((ImportMember)thisalias.getValue()).getModule() = ie)
|
||||
and
|
||||
(otheralias.getValue() = result or ((ImportMember)otheralias.getValue()).getModule() = result)
|
||||
and
|
||||
(
|
||||
exists(If i | i.getBody().contains(ie) and i.getOrelse().contains(result)) or
|
||||
exists(If i | i.getBody().contains(result) and i.getOrelse().contains(ie)) or
|
||||
exists(Try t | t.getBody().contains(ie) and t.getAHandler().contains(result)) or
|
||||
exists(Try t | t.getBody().contains(result) and t.getAHandler().contains(ie))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
string os_specific_import(ImportExpr ie) {
|
||||
exists(string name | name = ie.getImportedModuleName() |
|
||||
name.matches("org.python.%") and result = "java"
|
||||
or
|
||||
name.matches("java.%") and result = "java"
|
||||
or
|
||||
name.matches("Carbon.%") and result = "darwin"
|
||||
or
|
||||
result = "win32" and (
|
||||
name = "_winapi" or name = "_win32api" or name = "_winreg" or
|
||||
name = "nt" or name.matches("win32%") or name = "ntpath"
|
||||
)
|
||||
or
|
||||
result = "linux2" and (
|
||||
name = "posix" or name = "posixpath"
|
||||
)
|
||||
or
|
||||
result = "unsupported" and (
|
||||
name = "__pypy__" or name = "ce" or name.matches("riscos%")
|
||||
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
string get_os() {
|
||||
py_flags_versioned("sys.platform", result, major_version().toString())
|
||||
}
|
||||
|
||||
predicate ok_to_fail(ImportExpr ie) {
|
||||
alternative_import(ie).refersTo(_)
|
||||
or
|
||||
os_specific_import(ie) != get_os()
|
||||
}
|
||||
|
||||
from ImportExpr ie
|
||||
where not ie.refersTo(_) and
|
||||
exists(Context c | c.appliesTo(ie.getAFlowNode())) and
|
||||
not ok_to_fail(ie) and
|
||||
not exists(VersionGuard guard |
|
||||
if guard.isTrue() then
|
||||
guard.controls(ie.getAFlowNode().getBasicBlock(), false)
|
||||
else
|
||||
guard.controls(ie.getAFlowNode().getBasicBlock(), true)
|
||||
)
|
||||
|
||||
select ie, "Unable to resolve import of '" + ie.getImportedModuleName() + "'."
|
||||
11
python/ql/src/analysis/KeyPointsToFailure.qhelp
Normal file
11
python/ql/src/analysis/KeyPointsToFailure.qhelp
Normal file
@@ -0,0 +1,11 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>Points-to analysis underpins type inference and thus most of Semmle's Python analysis.
|
||||
Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
31
python/ql/src/analysis/KeyPointsToFailure.ql
Normal file
31
python/ql/src/analysis/KeyPointsToFailure.ql
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @name Key "points-to" fails for expression.
|
||||
* @description Expression does not "point-to" an object which prevents further points-to analysis.
|
||||
* @kind problem
|
||||
* @problem.severity info
|
||||
* @id py/key-points-to-failure
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
predicate points_to_failure(Expr e) {
|
||||
exists(ControlFlowNode f |
|
||||
f = e.getAFlowNode() |
|
||||
not f.refersTo(_)
|
||||
)
|
||||
}
|
||||
|
||||
predicate key_points_to_failure(Expr e) {
|
||||
points_to_failure(e) and not points_to_failure(e.getASubExpression())
|
||||
and
|
||||
not exists(SsaVariable ssa |
|
||||
ssa.getAUse() = e.getAFlowNode() |
|
||||
points_to_failure(ssa.getAnUltimateDefinition().getDefinition().getNode())
|
||||
)
|
||||
and
|
||||
not exists(Assign a | a.getATarget() = e)
|
||||
}
|
||||
|
||||
from Attribute e
|
||||
where key_points_to_failure(e) and not exists(Call c | c.getFunc() = e)
|
||||
select e, "Expression does not 'point-to' any object, but all its sources do."
|
||||
11
python/ql/src/analysis/PointsToFailure.qhelp
Normal file
11
python/ql/src/analysis/PointsToFailure.qhelp
Normal file
@@ -0,0 +1,11 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>Points-to analysis underpins type inference and thus most of Semmle's Python analysis.
|
||||
Failures in points-to undermines type inference and reduces the coverage and also accuracy of many queries.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
18
python/ql/src/analysis/PointsToFailure.ql
Normal file
18
python/ql/src/analysis/PointsToFailure.ql
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @name "points-to" fails for expression.
|
||||
* @description Expression does not "point-to" an object which prevents type inference.
|
||||
* @kind problem
|
||||
* @id py/points-to-failure
|
||||
* @problem.severity info
|
||||
* @tags reliability
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
from Expr e
|
||||
where exists(ControlFlowNode f |
|
||||
f = e.getAFlowNode() |
|
||||
not f.refersTo(_)
|
||||
)
|
||||
|
||||
select e, "Expression does not 'point-to' any object."
|
||||
13
python/ql/src/analysis/Pruned.ql
Normal file
13
python/ql/src/analysis/Pruned.ql
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
import python
|
||||
import semmle.python.pointsto.PointsTo
|
||||
|
||||
from int size
|
||||
|
||||
where
|
||||
size = count(ControlFlowNode f |
|
||||
not PointsTo::Test::reachableBlock(f.getBasicBlock(), _)
|
||||
)
|
||||
|
||||
|
||||
select size
|
||||
27
python/ql/src/analysis/RatioOfDefinitions.ql
Normal file
27
python/ql/src/analysis/RatioOfDefinitions.ql
Normal file
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* @name Ratio of jump-to-definitions computed
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
import DefinitionTracking
|
||||
|
||||
predicate want_to_have_definition(Expr e) {
|
||||
/* not builtin object like len, tuple, etc. */
|
||||
not exists(Object cobj | e.refersTo(cobj) and cobj.isC()) and
|
||||
(
|
||||
e instanceof Name and e.(Name).getCtx() instanceof Load
|
||||
or
|
||||
e instanceof Attribute and e.(Attribute).getCtx() instanceof Load
|
||||
or
|
||||
e instanceof ImportMember or
|
||||
e instanceof ImportExpr
|
||||
)
|
||||
}
|
||||
|
||||
from int yes, int no
|
||||
where
|
||||
yes = count(Expr e | want_to_have_definition(e) and exists(getUniqueDefinition(e)))
|
||||
and
|
||||
no = count(Expr e | want_to_have_definition(e) and not exists(getUniqueDefinition(e)))
|
||||
select yes, no, yes*100/(yes+no) + "%"
|
||||
228
python/ql/src/analysis/Sanity.ql
Normal file
228
python/ql/src/analysis/Sanity.ql
Normal file
@@ -0,0 +1,228 @@
|
||||
/**
|
||||
* @name Sanity check
|
||||
* @description General sanity check to be run on any and all code. Should never produce any results.
|
||||
* @id py/sanity-check
|
||||
*/
|
||||
|
||||
import python
|
||||
import DefinitionTracking
|
||||
|
||||
predicate uniqueness_error(int number, string what, string problem) {
|
||||
(
|
||||
what = "toString" or what = "getLocation" or what = "getNode" or what = "getDefinition" or
|
||||
what = "getEntryNode" or what = "getOrigin" or what = "getAnInferredType"
|
||||
)
|
||||
and
|
||||
(
|
||||
number = 0 and problem = "no results for " + what + "()"
|
||||
or
|
||||
number in [2 .. 10] and problem = number.toString() + " results for " + what + "()"
|
||||
)
|
||||
}
|
||||
|
||||
predicate ast_sanity(string clsname, string problem, string what) {
|
||||
exists(AstNode a |
|
||||
clsname = a.getAQlClass() |
|
||||
uniqueness_error(count(a.toString()), "toString", problem) and what = "at " + a.getLocation().toString() or
|
||||
uniqueness_error(strictcount(a.getLocation()), "getLocation", problem) and what = a.getLocation().toString() or
|
||||
not exists(a.getLocation()) and problem = "no location" and what = a.toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate location_sanity(string clsname, string problem, string what) {
|
||||
exists(Location l |
|
||||
clsname = l.getAQlClass() |
|
||||
uniqueness_error(count(l.toString()), "toString", problem) and what = "at " + l.toString() or
|
||||
not exists(l.toString()) and problem = "no toString" and
|
||||
(
|
||||
exists(AstNode thing |
|
||||
thing.getLocation() = l |
|
||||
what = "a location of a " + thing.getAQlClass()
|
||||
)
|
||||
or
|
||||
not exists(AstNode thing | thing.getLocation() = l) and
|
||||
what = "a location"
|
||||
)
|
||||
or
|
||||
l.getEndLine() < l.getStartLine() and problem = "end line before start line" and what = "at " + l.toString()
|
||||
or
|
||||
l.getEndLine() = l.getStartLine() and l.getEndColumn() < l.getStartColumn() and
|
||||
problem = "end column before start column" and what = "at " + l.toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate cfg_sanity(string clsname, string problem, string what) {
|
||||
exists(ControlFlowNode f |
|
||||
clsname = f.getAQlClass() |
|
||||
uniqueness_error(count(f.getNode()), "getNode", problem) and what = "at " + f.getLocation().toString() or
|
||||
not exists(f.getLocation()) and problem = "no location" and what = f.toString() or
|
||||
uniqueness_error(count(f.(AttrNode).getObject()), "getValue", problem) and what = "at " + f.getLocation().toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate scope_sanity(string clsname, string problem, string what) {
|
||||
exists(Scope s |
|
||||
clsname = s.getAQlClass() |
|
||||
uniqueness_error(count(s.getEntryNode()), "getEntryNode", problem) and what = "at " + s.getLocation().toString() or
|
||||
uniqueness_error(count(s.toString()), "toString", problem) and what = "at " + s.getLocation().toString() or
|
||||
uniqueness_error(strictcount(s.getLocation()), "getLocation", problem) and what = "at " + s.getLocation().toString() or
|
||||
not exists(s.getLocation()) and problem = "no location" and what = s.toString()
|
||||
)
|
||||
}
|
||||
|
||||
string best_description_builtin_object(Object o) {
|
||||
o.isBuiltin() and
|
||||
(
|
||||
result = o.toString()
|
||||
or
|
||||
not exists(o.toString()) and py_cobjectnames(o, result)
|
||||
or
|
||||
not exists(o.toString()) and not py_cobjectnames(o, _) and result = "builtin object of type " + o.getAnInferredType().toString()
|
||||
or
|
||||
not exists(o.toString()) and not py_cobjectnames(o, _) and not exists(o.getAnInferredType().toString()) and result = "builtin object"
|
||||
)
|
||||
}
|
||||
|
||||
private predicate introspected_builtin_object(Object o) {
|
||||
/* Only check objects from the extractor, missing data for objects generated from C source code analysis is OK.
|
||||
* as it will be ignored if it doesn't match up with the introspected form. */
|
||||
py_cobject_sources(o, 0)
|
||||
}
|
||||
|
||||
predicate builtin_object_sanity(string clsname, string problem, string what) {
|
||||
exists(Object o |
|
||||
clsname = o.getAQlClass() and what = best_description_builtin_object(o) and introspected_builtin_object(o) |
|
||||
not exists(o.getAnInferredType()) and not py_cobjectnames(o, _) and problem = "neither name nor type"
|
||||
or
|
||||
uniqueness_error(count(string name | py_cobjectnames(o, name)), "name", problem)
|
||||
or
|
||||
not exists(o.getAnInferredType()) and problem = "no results for getAnInferredType"
|
||||
or
|
||||
not exists(o.toString()) and problem = "no toString" and
|
||||
not exists(string name | name.prefix(7) = "_semmle" | py_special_objects(o, name)) and
|
||||
not o = unknownValue()
|
||||
)
|
||||
}
|
||||
|
||||
predicate source_object_sanity(string clsname, string problem, string what) {
|
||||
exists(Object o |
|
||||
clsname = o.getAQlClass() and not o.isBuiltin() |
|
||||
uniqueness_error(count(o.getOrigin()), "getOrigin", problem) and what = "at " + o.getOrigin().getLocation().toString()
|
||||
or
|
||||
not exists(o.getOrigin().getLocation()) and problem = "no location" and what = "??"
|
||||
or
|
||||
not exists(o.toString()) and problem = "no toString" and what = "at " + o.getOrigin().getLocation().toString()
|
||||
or
|
||||
strictcount(o.toString()) > 1 and problem = "multiple toStrings()" and what = o.toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate ssa_sanity(string clsname, string problem, string what) {
|
||||
/* Zero or one definitions of each SSA variable */
|
||||
exists(SsaVariable var |
|
||||
clsname = var.getAQlClass() |
|
||||
uniqueness_error(strictcount(var.getDefinition()), "getDefinition", problem) and what = var.getId()
|
||||
)
|
||||
or
|
||||
/* Dominance criterion: Definition *must* dominate *all* uses. */
|
||||
exists(SsaVariable var, ControlFlowNode defn, ControlFlowNode use |
|
||||
defn = var.getDefinition() and use = var.getAUse() |
|
||||
not defn.strictlyDominates(use) and not defn = use and
|
||||
/* Phi nodes which share a flow node with a use come *before* the use */
|
||||
not (exists(var.getAPhiInput()) and defn = use) and
|
||||
clsname = var.getAQlClass() and problem = "a definition which does not dominate a use at " + use.getLocation() and what = var.getId() + " at " + var.getLocation()
|
||||
)
|
||||
or
|
||||
/* Minimality of phi nodes */
|
||||
exists(SsaVariable var |
|
||||
strictcount(var.getAPhiInput()) = 1 and
|
||||
var.getAPhiInput().getDefinition().getBasicBlock().strictlyDominates(var.getDefinition().getBasicBlock())
|
||||
|
|
||||
clsname = var.getAQlClass() and problem = " a definition which is dominated by the definition of an incoming phi edge." and what = var.getId() + " at " + var.getLocation()
|
||||
)
|
||||
}
|
||||
|
||||
predicate function_object_sanity(string clsname, string problem, string what) {
|
||||
exists(FunctionObject func |
|
||||
clsname = func.getAQlClass() |
|
||||
what = func.getName() and
|
||||
(
|
||||
count(func.descriptiveString()) = 0 and problem = "no descriptiveString()"
|
||||
or
|
||||
exists(int c |
|
||||
c = strictcount(func.descriptiveString()) and c > 1 |
|
||||
problem = c + "descriptiveString()s"
|
||||
)
|
||||
)
|
||||
or
|
||||
not exists(func.getName()) and what = "?" and problem = "no name"
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
predicate multiple_origins_per_object(Object obj) {
|
||||
not obj.isC() and not obj instanceof ModuleObject and
|
||||
exists(ControlFlowNode use | strictcount(ControlFlowNode orig | use.refersTo(obj, orig)) > 1)
|
||||
}
|
||||
|
||||
predicate intermediate_origins(ControlFlowNode use, ControlFlowNode inter, Object obj) {
|
||||
exists(ControlFlowNode orig |
|
||||
not inter = orig |
|
||||
use.refersTo(obj, inter) and
|
||||
inter.refersTo(obj, orig) and
|
||||
// It can sometimes happen that two different modules (e.g. cPickle and Pickle)
|
||||
// have the same attribute, but different origins.
|
||||
not strictcount(Object val | inter.(AttrNode).getObject().refersTo(val)) > 1
|
||||
)
|
||||
}
|
||||
|
||||
predicate points_to_sanity(string clsname, string problem, string what) {
|
||||
exists(Object obj |
|
||||
multiple_origins_per_object(obj) and clsname = obj.getAQlClass() and
|
||||
problem = "multiple origins for an object" and what = obj.toString()
|
||||
)
|
||||
or
|
||||
exists(ControlFlowNode use, ControlFlowNode inter, Object obj |
|
||||
intermediate_origins(use, inter, obj) and
|
||||
clsname = use.getAQlClass() and
|
||||
problem = "has intermediate origin " + inter and
|
||||
what = use.toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate jump_to_definition_sanity(string clsname, string problem, string what) {
|
||||
problem = "multiple (jump-to) definitions" and
|
||||
exists(Expr use |
|
||||
strictcount(getUniqueDefinition(use)) > 1 and
|
||||
clsname = use.getAQlClass() and
|
||||
what = use.toString()
|
||||
)
|
||||
}
|
||||
|
||||
predicate file_sanity(string clsname, string problem, string what) {
|
||||
exists(File file, Folder folder |
|
||||
clsname = file.getAQlClass() and
|
||||
problem = "has same name as a folder" and
|
||||
what = file.getName() and
|
||||
what = folder.getName()
|
||||
) or
|
||||
exists(Container f |
|
||||
clsname = f.getAQlClass() and
|
||||
uniqueness_error(count(f.toString()), "toString", problem) and what = "file " + f.getName()
|
||||
)
|
||||
}
|
||||
|
||||
from string clsname, string problem, string what
|
||||
where
|
||||
ast_sanity(clsname, problem, what) or
|
||||
location_sanity(clsname, problem, what)or
|
||||
scope_sanity(clsname, problem, what) or
|
||||
cfg_sanity(clsname, problem, what) or
|
||||
ssa_sanity(clsname, problem, what) or
|
||||
builtin_object_sanity(clsname, problem, what) or
|
||||
source_object_sanity(clsname, problem, what) or
|
||||
function_object_sanity(clsname, problem, what) or
|
||||
points_to_sanity(clsname, problem, what) or
|
||||
jump_to_definition_sanity(clsname, problem, what) or
|
||||
file_sanity(clsname, problem, what)
|
||||
select clsname + " " + what + " has " + problem
|
||||
38
python/ql/src/analysis/Summary.ql
Normal file
38
python/ql/src/analysis/Summary.ql
Normal file
@@ -0,0 +1,38 @@
|
||||
/** Summarize a snapshot
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
from string key, string value
|
||||
where
|
||||
key = "Extractor version" and py_flags_versioned("extractor.version", value, _)
|
||||
or
|
||||
key = "Snapshot build time" and exists(date d | snapshotDate(d) and value = d.toString())
|
||||
or
|
||||
key = "Interpreter version" and
|
||||
exists(string major, string minor |
|
||||
py_flags_versioned("version.major", major, _) and
|
||||
py_flags_versioned("version.minor", minor, _) and
|
||||
value = major + "." + minor
|
||||
)
|
||||
or
|
||||
key = "Build platform" and
|
||||
exists(string raw |
|
||||
py_flags_versioned("sys.platform", raw, _) |
|
||||
if raw = "win32" then
|
||||
value = "Windows"
|
||||
else if raw = "linux2" then
|
||||
value = "Linux"
|
||||
else if raw = "darwin" then
|
||||
value = "OSX"
|
||||
else
|
||||
value = raw
|
||||
)
|
||||
or
|
||||
key = "Source location" and sourceLocationPrefix(value)
|
||||
or
|
||||
key = "Lines of code (source)" and value = sum(ModuleMetrics m | exists(m.getFile().getRelativePath()) | m.getNumberOfLinesOfCode()).toString()
|
||||
or
|
||||
key = "Lines of code (total)" and value = sum(ModuleMetrics m | any() | m.getNumberOfLinesOfCode()).toString()
|
||||
|
||||
select key, value
|
||||
15
python/ql/src/analysis/TypeHierarchyFailure.qhelp
Normal file
15
python/ql/src/analysis/TypeHierarchyFailure.qhelp
Normal file
@@ -0,0 +1,15 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>In order to analyse uses of a class, all its attributes need to be known. Without the full inheritance hierarchy this is impossible.
|
||||
This is an informational query only.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
This is an informational query only, this query depends on points-to and type inference.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
16
python/ql/src/analysis/TypeHierarchyFailure.ql
Normal file
16
python/ql/src/analysis/TypeHierarchyFailure.ql
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* @name Inheritance hierarchy cannot be inferred for class
|
||||
* @description Inability to infer inheritance hierarchy cannot be inferred for class will impair analysis
|
||||
* @id py/failed-inheritance-inference
|
||||
* @kind problem
|
||||
* @problem.severity info
|
||||
*/
|
||||
|
||||
import python
|
||||
|
||||
|
||||
from Class cls
|
||||
where not exists(ClassObject c | c.getPyClass() = cls)
|
||||
or
|
||||
exists(ClassObject c | c.getPyClass() = cls | c.failedInference())
|
||||
select cls, "Inference of class hierarchy failed for class."
|
||||
13
python/ql/src/analysis/TypeInferenceFailure.qhelp
Normal file
13
python/ql/src/analysis/TypeInferenceFailure.qhelp
Normal file
@@ -0,0 +1,13 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Type inference is the key part of Semmle's Python analysis.
|
||||
Failures in type inference and reduces the coverage and also accuracy of many queries.
|
||||
</p>
|
||||
|
||||
</overview>
|
||||
</qhelp>
|
||||
14
python/ql/src/analysis/TypeInferenceFailure.ql
Normal file
14
python/ql/src/analysis/TypeInferenceFailure.ql
Normal file
@@ -0,0 +1,14 @@
|
||||
/**
|
||||
* @name Type inference fails for 'object'
|
||||
* @description Type inference fails for 'object' which reduces recall for many queries.
|
||||
* @kind problem
|
||||
* @problem.severity info
|
||||
* @id py/type-inference-failure
|
||||
*/
|
||||
import python
|
||||
|
||||
|
||||
from ControlFlowNode f, Object o
|
||||
where f.refersTo(o) and
|
||||
not exists(ClassObject c | f.refersTo(o, c, _))
|
||||
select o, "Type inference fails for 'object'."
|
||||
Reference in New Issue
Block a user