/** * @name Consistency check * @description General consistency check to be run on any and all code. Should never produce any results. * @id py/consistency-check */ import python import analysis.DefinitionTracking predicate uniqueness_error(int number, string what, string problem) { what in [ "toString", "getLocation", "getNode", "getDefinition", "getEntryNode", "getOrigin", "getAnInferredType" ] and ( number = 0 and problem = "no results for " + what + "()" or number in [2 .. 10] and problem = number.toString() + " results for " + what + "()" ) } predicate ast_consistency(string clsname, string problem, string what) { exists(AstNode a | clsname = a.getAQlClass() | uniqueness_error(count(a.toString()), "toString", problem) and what = "at " + a.getLocation().toString() or uniqueness_error(strictcount(a.getLocation()), "getLocation", problem) and what = a.getLocation().toString() or not exists(a.getLocation()) and not a.(Module).isPackage() and problem = "no location" and what = a.toString() ) } predicate location_consistency(string clsname, string problem, string what) { exists(Location l | clsname = l.getAQlClass() | uniqueness_error(count(l.toString()), "toString", problem) and what = "at " + l.toString() or not exists(l.toString()) and problem = "no toString" and ( exists(AstNode thing | thing.getLocation() = l | what = "a location of a " + thing.getAQlClass() ) or not exists(AstNode thing | thing.getLocation() = l) and what = "a location" ) or l.getEndLine() < l.getStartLine() and problem = "end line before start line" and what = "at " + l.toString() or l.getEndLine() = l.getStartLine() and l.getEndColumn() < l.getStartColumn() and problem = "end column before start column" and what = "at " + l.toString() ) } predicate cfg_consistency(string clsname, string problem, string what) { exists(ControlFlowNode f | clsname = f.getAQlClass() | uniqueness_error(count(f.getNode()), "getNode", problem) and what = "at " + f.getLocation().toString() or not exists(f.getLocation()) and not exists(Module p | p.isPackage() | p.getEntryNode() = f or p.getAnExitNode() = f) and problem = "no location" and what = f.toString() or uniqueness_error(count(f.(AttrNode).getObject()), "getValue", problem) and what = "at " + f.getLocation().toString() ) } predicate scope_consistency(string clsname, string problem, string what) { exists(Scope s | clsname = s.getAQlClass() | uniqueness_error(count(s.getEntryNode()), "getEntryNode", problem) and what = "at " + s.getLocation().toString() or uniqueness_error(count(s.toString()), "toString", problem) and what = "at " + s.getLocation().toString() or uniqueness_error(strictcount(s.getLocation()), "getLocation", problem) and what = "at " + s.getLocation().toString() or not exists(s.getLocation()) and problem = "no location" and what = s.toString() and not s.(Module).isPackage() ) } string best_description_builtin_object(Object o) { o.isBuiltin() and ( result = o.toString() or not exists(o.toString()) and py_cobjectnames(o, result) or not exists(o.toString()) and not py_cobjectnames(o, _) and result = "builtin object of type " + o.getAnInferredType().toString() or not exists(o.toString()) and not py_cobjectnames(o, _) and not exists(o.getAnInferredType().toString()) and result = "builtin object" ) } private predicate introspected_builtin_object(Object o) { /* * Only check objects from the extractor, missing data for objects generated from C source code analysis is OK. * as it will be ignored if it doesn't match up with the introspected form. */ py_cobject_sources(o, 0) } predicate builtin_object_consistency(string clsname, string problem, string what) { exists(Object o | clsname = o.getAQlClass() and what = best_description_builtin_object(o) and introspected_builtin_object(o) | not exists(o.getAnInferredType()) and not py_cobjectnames(o, _) and problem = "neither name nor type" or uniqueness_error(count(string name | py_cobjectnames(o, name)), "name", problem) or not exists(o.getAnInferredType()) and problem = "no results for getAnInferredType" or not exists(o.toString()) and problem = "no toString" and not exists(string name | name.matches("\\_semmle%") | py_special_objects(o, name)) and not o = unknownValue() ) } predicate source_object_consistency(string clsname, string problem, string what) { exists(Object o | clsname = o.getAQlClass() and not o.isBuiltin() | uniqueness_error(count(o.getOrigin()), "getOrigin", problem) and what = "at " + o.getOrigin().getLocation().toString() or not exists(o.getOrigin().getLocation()) and problem = "no location" and what = "??" or not exists(o.toString()) and problem = "no toString" and what = "at " + o.getOrigin().getLocation().toString() or strictcount(o.toString()) > 1 and problem = "multiple toStrings()" and what = o.toString() ) } predicate ssa_consistency(string clsname, string problem, string what) { /* Zero or one definitions of each SSA variable */ exists(SsaVariable var | clsname = var.getAQlClass() | uniqueness_error(strictcount(var.getDefinition()), "getDefinition", problem) and what = var.getId() ) or /* Dominance criterion: Definition *must* dominate *all* uses. */ exists(SsaVariable var, ControlFlowNode defn, ControlFlowNode use | defn = var.getDefinition() and use = var.getAUse() | not defn.strictlyDominates(use) and not defn = use and /* Phi nodes which share a flow node with a use come *before* the use */ not (exists(var.getAPhiInput()) and defn = use) and clsname = var.getAQlClass() and problem = "a definition which does not dominate a use at " + use.getLocation() and what = var.getId() + " at " + var.getLocation() ) or /* Minimality of phi nodes */ exists(SsaVariable var | strictcount(var.getAPhiInput()) = 1 and var.getAPhiInput() .getDefinition() .getBasicBlock() .strictlyDominates(var.getDefinition().getBasicBlock()) | clsname = var.getAQlClass() and problem = " a definition which is dominated by the definition of an incoming phi edge." and what = var.getId() + " at " + var.getLocation() ) } predicate function_object_consistency(string clsname, string problem, string what) { exists(FunctionObject func | clsname = func.getAQlClass() | what = func.getName() and ( not exists(func.descriptiveString()) and problem = "no descriptiveString()" or exists(int c | c = strictcount(func.descriptiveString()) and c > 1 | problem = c + "descriptiveString()s" ) ) or not exists(func.getName()) and what = "?" and problem = "no name" ) } predicate multiple_origins_per_object(Object obj) { not obj.isC() and not obj instanceof ModuleObject and exists(ControlFlowNode use, Context ctx | strictcount(ControlFlowNode orig | use.refersTo(ctx, obj, _, orig)) > 1 ) } predicate intermediate_origins(ControlFlowNode use, ControlFlowNode inter, Object obj) { exists(ControlFlowNode orig, Context ctx | not inter = orig | use.refersTo(ctx, obj, _, inter) and inter.refersTo(ctx, obj, _, orig) and // It can sometimes happen that two different modules (e.g. cPickle and Pickle) // have the same attribute, but different origins. not strictcount(Object val | inter.(AttrNode).getObject().refersTo(val)) > 1 ) } predicate points_to_consistency(string clsname, string problem, string what) { exists(Object obj | multiple_origins_per_object(obj) and clsname = obj.getAQlClass() and problem = "multiple origins for an object" and what = obj.toString() ) or exists(ControlFlowNode use, ControlFlowNode inter | intermediate_origins(use, inter, _) and clsname = use.getAQlClass() and problem = "has intermediate origin " + inter and what = use.toString() ) } predicate jump_to_definition_consistency(string clsname, string problem, string what) { problem = "multiple (jump-to) definitions" and exists(Expr use | strictcount(getUniqueDefinition(use)) > 1 and clsname = use.getAQlClass() and what = use.toString() ) } predicate file_consistency(string clsname, string problem, string what) { exists(File file, Folder folder | clsname = file.getAQlClass() and problem = "has same name as a folder" and what = file.getAbsolutePath() and what = folder.getAbsolutePath() ) or exists(Container f | clsname = f.getAQlClass() and uniqueness_error(count(f.toString()), "toString", problem) and what = "file " + f.getAbsolutePath() ) } predicate class_value_consistency(string clsname, string problem, string what) { exists(ClassValue value, ClassValue sup, string attr | what = value.getName() and sup = value.getASuperType() and exists(sup.lookup(attr)) and not value.failedInference(_) and not exists(value.lookup(attr)) and clsname = value.getAQlClass() and problem = "no attribute '" + attr + "', but super type '" + sup.getName() + "' does." ) } from string clsname, string problem, string what where ast_consistency(clsname, problem, what) or location_consistency(clsname, problem, what) or scope_consistency(clsname, problem, what) or cfg_consistency(clsname, problem, what) or ssa_consistency(clsname, problem, what) or builtin_object_consistency(clsname, problem, what) or source_object_consistency(clsname, problem, what) or function_object_consistency(clsname, problem, what) or points_to_consistency(clsname, problem, what) or jump_to_definition_consistency(clsname, problem, what) or file_consistency(clsname, problem, what) or class_value_consistency(clsname, problem, what) select clsname + " " + what + " has " + problem