Merge pull request #1636 from markshannon/python-api-odds-and-ends

Python: Assorted improvements to API.
This commit is contained in:
Rebecca Valentine
2019-08-07 09:50:44 -07:00
committed by GitHub
13 changed files with 414 additions and 15 deletions

View File

@@ -1,5 +1,6 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.objects.ObjectInternal
/** An expression */
class Expr extends Expr_, AstNode {
@@ -71,7 +72,8 @@ class Expr extends Expr_, AstNode {
result = this.getASubExpression()
}
/** Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to
/** NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to". Performs a combination of localized (intra-procedural) points-to
* analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly
* precise, but may not provide information for a significant number of flow-nodes.
* If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead.
@@ -82,13 +84,15 @@ class Expr extends Expr_, AstNode {
this.refersTo(_, obj, cls, origin)
}
/** Gets what this expression might "refer-to" in the given `context`.
/** NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Gets what this expression might "refer-to" in the given `context`.
*/
predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) {
this.getAFlowNode().refersTo(context, obj, cls, origin.getAFlowNode())
}
/** Whether this expression might "refer-to" to `value` which is from `origin`
/** NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Holds if this expression might "refer-to" to `value` which is from `origin`
* Unlike `this.refersTo(value, _, origin)`, this predicate includes results
* where the class cannot be inferred.
*/
@@ -97,11 +101,31 @@ class Expr extends Expr_, AstNode {
this.getAFlowNode().refersTo(obj, origin.getAFlowNode())
}
/** Equivalent to `this.refersTo(value, _)` */
/** NOTE: `refersTo` will be deprecated in 2019. Use `pointsTo` instead.
* Equivalent to `this.refersTo(value, _)` */
predicate refersTo(Object obj) {
this.refersTo(obj, _)
}
/** Holds if this expression might "point-to" to `value` which is from `origin`
* in the given `context`.
*/
predicate pointsTo(Context context, Value value, AstNode origin) {
this.getAFlowNode().pointsTo(context, value, origin.getAFlowNode())
}
/** Holds if this expression might "point-to" to `value` which is from `origin`.
*/
predicate pointsTo(Value value, AstNode origin) {
this.getAFlowNode().pointsTo(value, origin.getAFlowNode())
}
/** Holds if this expression might "point-to" to `value`.
*/
predicate pointsTo(Value value) {
this.pointsTo(value, _)
}
}
/** An attribute expression, such as `value.attr` */
@@ -346,6 +370,11 @@ abstract class ImmutableLiteral extends Expr {
abstract Object getLiteralObject();
abstract boolean booleanValue();
final Value getLiteralValue() {
result.(ConstantObjectInternal).getLiteral() = this
}
}
/** A numerical constant expression, such as `7` or `4.2` */
@@ -472,8 +501,10 @@ class NegativeIntegerLiteral extends ImmutableLiteral, UnaryExpr {
py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN())
}
/** Gets the (integer) value of this constant. Will not return a result if the value does not fit into
a 32 bit signed value */
int getValue() {
result = -this.getOperand().(IntegerLiteral).getValue()
result = -(this.getOperand().(IntegerLiteral).getValue())
}
}

View File

@@ -222,6 +222,11 @@ class ControlFlowNode extends @py_flow_node {
this.pointsTo(_, value, _)
}
/** Gets the value that this ControlFlowNode points-to. */
Value pointsTo() {
this.pointsTo(_, result, _)
}
/** Gets a value that this ControlFlowNode may points-to. */
Value inferredValue() {
this.pointsTo(_, result, _)

View File

@@ -73,12 +73,15 @@ abstract class ConstantObjectInternal extends ObjectInternal {
override predicate useOriginAsLegacyObject() { none() }
/** Gets an AST literal with the same value as this object */
abstract ImmutableLiteral getLiteral();
}
private abstract class BooleanObjectInternal extends ConstantObjectInternal {
override ObjectInternal getClass() {
result = TBuiltinClassObject(Builtin::special("bool"))
result = ClassValue::bool()
}
override int length() { none() }
@@ -90,6 +93,10 @@ private abstract class BooleanObjectInternal extends ConstantObjectInternal {
/* Booleans aren't iterable */
override ObjectInternal getIterNext() { none() }
override ImmutableLiteral getLiteral() {
result.(BooleanLiteral).booleanValue() = this.booleanValue()
}
}
private class TrueObjectInternal extends BooleanObjectInternal, TTrue {
@@ -175,10 +182,14 @@ private class NoneObjectInternal extends ConstantObjectInternal, TNone {
/* None isn't iterable */
override ObjectInternal getIterNext() { none() }
override ImmutableLiteral getLiteral() {
result instanceof None
}
}
private class IntObjectInternal extends ConstantObjectInternal, TInt {
class IntObjectInternal extends ConstantObjectInternal, TInt {
override string toString() {
result = "int " + this.intValue().toString()
@@ -216,9 +227,15 @@ private class IntObjectInternal extends ConstantObjectInternal, TInt {
/* ints aren't iterable */
override ObjectInternal getIterNext() { none() }
override ImmutableLiteral getLiteral() {
result.(IntegerLiteral).getValue() = this.intValue()
or
result.(NegativeIntegerLiteral).getOperand().(IntegerLiteral).getValue() = -this.intValue()
}
}
private class FloatObjectInternal extends ConstantObjectInternal, TFloat {
class FloatObjectInternal extends ConstantObjectInternal, TFloat {
override string toString() {
if this.floatValue() = this.floatValue().floor() then (
@@ -241,7 +258,7 @@ private class FloatObjectInternal extends ConstantObjectInternal, TFloat {
result.floatValue() = this.floatValue()
}
private float floatValue() {
float floatValue() {
this = TFloat(result)
}
@@ -264,10 +281,14 @@ private class FloatObjectInternal extends ConstantObjectInternal, TFloat {
/* floats aren't iterable */
override ObjectInternal getIterNext() { none() }
override ImmutableLiteral getLiteral() {
result.(FloatLiteral).getValue() = this.floatValue()
}
}
private class UnicodeObjectInternal extends ConstantObjectInternal, TUnicode {
class UnicodeObjectInternal extends ConstantObjectInternal, TUnicode {
override string toString() {
result = "'" + this.strValue() + "'"
@@ -310,9 +331,13 @@ private class UnicodeObjectInternal extends ConstantObjectInternal, TUnicode {
result = TUnknownInstance(this.getClass())
}
override ImmutableLiteral getLiteral() {
result.(Unicode).getText() = this.strValue()
}
}
private class BytesObjectInternal extends ConstantObjectInternal, TBytes {
class BytesObjectInternal extends ConstantObjectInternal, TBytes {
override string toString() {
result = "'" + this.strValue() + "'"
@@ -355,6 +380,10 @@ private class BytesObjectInternal extends ConstantObjectInternal, TBytes {
result = TUnknownInstance(this.getClass())
}
override ImmutableLiteral getLiteral() {
result.(Bytes).getText() = this.strValue()
}
}

View File

@@ -12,6 +12,7 @@ private import semmle.python.objects.ObjectInternal
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.pointsto.MRO
private import semmle.python.types.Builtins
/* Use the term `ObjectSource` to refer to DB entity. Either a CFG node
* for Python objects, or `@py_cobject` entity for built-in objects.
@@ -85,7 +86,7 @@ class Value extends TObject {
filepath = "" and bl = 0 and bc = 0 and el = 0 and ec = 0
}
/** Gets the name of this value, if it has one.
/** Gets the name of this value, if it has one.
* Note this is the innate name of the
* object, not necessarily all the names by which it can be called.
*/
@@ -190,6 +191,56 @@ module Value {
name = "False" and result = TFalse()
}
/** Gets the `Value` for the integer constant `i`, if it exists.
* There will be no `Value` for most integers, but the following are
* guaranteed to exist:
* * From zero to 511 inclusive.
* * All powers of 2 (up to 2**30)
* * Any integer explicitly mentioned in the source program.
*/
Value forInt(int i) {
result.(IntObjectInternal).intValue() = i
}
/** Gets the `Value` for the bytes constant `bytes`, if it exists.
* There will be no `Value` for most byte strings, unless it is explicitly
* declared in the source program.
*/
Value forBytes(string bytes) {
result.(BytesObjectInternal).strValue() = bytes
}
/** Gets the `Value` for the unicode constant `text`, if it exists.
* There will be no `Value` for most text strings, unless it is explicitly
* declared in the source program.
*/
Value forUnicode(string text) {
result.(UnicodeObjectInternal).strValue() = text
}
/** Gets a `Value` for the string `text`. May be a bytes or unicode string for Python 2.
* There will be no `Value` for most strings, unless it is explicitly
* declared in the source program.
*/
Value forString(string text) {
result.(UnicodeObjectInternal).strValue() = text
or
major_version() = 2 and
result.(BytesObjectInternal).strValue() = text
}
/** Gets the `Value` for the bool constant `b`. */
Value forBool(boolean b) {
b = true and result = TTrue()
or
b = false and result = TFalse()
}
/** Gets the `Value` for `None`. */
Value none_() {
result = ObjectInternal::none_()
}
}
/** Class representing callables in the Python program
@@ -228,7 +279,7 @@ class CallableValue extends Value {
cached ControlFlowNode getArgumentForCall(CallNode call, int n) {
exists(ObjectInternal called, int offset |
PointsToInternal::pointsTo(call.getFunction(), _, called, _) and
called.functionAndOffset(this, offset)
called.functionAndOffset(this, offset)
|
call.getArg(n-offset) = result
or
@@ -316,13 +367,13 @@ class ClassValue extends Value {
result = Types::getBase(this, n)
}
/** Holds if this class is a new style class.
/** Holds if this class is a new style class.
A new style class is one that implicitly or explicitly inherits from `object`. */
predicate isNewStyle() {
Types::isNewStyle(this)
}
/** Holds if this class is an old style class.
/** Holds if this class is an old style class.
An old style class is one that does not inherit from `object`. */
predicate isOldStyle() {
Types::isOldStyle(this)
@@ -333,6 +384,100 @@ class ClassValue extends Value {
result = this.(PythonClassObjectInternal).getScope()
}
/** Holds if this class has the attribute `name`, including
* attributes declared by super classes.
*/
predicate hasAttribute(string name) {
this.getMro().declares(name)
}
/** Holds if this class declares the attribute `name`,
* *not* including attributes declared by super classes.
*/
predicate declaresAttribute(string name) {
this.(ClassObjectInternal).getClassDeclaration().declaresAttribute(name)
}
}
/** Class representing functions in the Python program, both Python and built-in.
* Note that this does not include other callables such as bound-methods.
*/
abstract class FunctionValue extends CallableValue {
abstract string getQualifiedName();
}
/** Class representing Python functions */
class PythonFunctionValue extends FunctionValue {
PythonFunctionValue() {
this instanceof PythonFunctionObjectInternal
}
override string getQualifiedName() {
result = this.(PythonFunctionObjectInternal).getScope().getQualifiedName()
}
}
/** Class representing builtin functions, such as `len` or `print` */
class BuiltinFunctionValue extends FunctionValue {
BuiltinFunctionValue() {
this instanceof BuiltinFunctionObjectInternal
}
override string getQualifiedName() {
result = this.(BuiltinFunctionObjectInternal).getName()
}
}
/** Class representing builtin methods, such as `list.append` or `set.add` */
class BuiltinMethodValue extends FunctionValue {
BuiltinMethodValue() {
this instanceof BuiltinMethodObjectInternal
}
override string getQualifiedName() {
exists(Builtin cls |
cls.isClass() and
cls.getMember(_) = this.(BuiltinMethodObjectInternal).getBuiltin() and
result = cls.getName() + "." + this.getName()
)
}
}
/** A class representing sequence objects with a length and tracked items.
*/
class SequenceValue extends Value {
SequenceValue() {
this instanceof SequenceObjectInternal
}
Value getItem(int n) {
result = this.(SequenceObjectInternal).getItem(n)
}
int length() {
result = this.(SequenceObjectInternal).length()
}
}
/** A class representing tuple objects */
class TupleValue extends SequenceValue {
TupleValue() {
this instanceof TupleObjectInternal
}
}
/** A method-resolution-order sequence of classes */
@@ -347,5 +492,87 @@ class MRO extends TClassList {
result = this.(ClassList).getItem(n)
}
/** Holds if any class in this MRO declares the attribute `name` */
predicate declares(string name) {
this.(ClassList).declares(name)
}
/** Gets the length of this MRO */
int length() {
result = this.(ClassList).length()
}
/** Holds if this MRO contains `cls` */
predicate contains(ClassValue cls) {
this.(ClassList).contains(cls)
}
/** Gets the value from scanning for the attribute `name` in this MRO. */
Value lookup(string name) {
this.(ClassList).lookup(name, result, _)
}
/** Gets the MRO formed by removing all classes before `cls`
* from this MRO.
*/
MRO startingAt(ClassValue cls) {
result = this.(ClassList).startingAt(cls)
}
}
module ClassValue {
/** Get the `ClassValue` for the `bool` class. */
ClassValue bool() {
result = TBuiltinClassObject(Builtin::special("bool"))
}
/** Get the `ClassValue` for the class of Python functions. */
ClassValue function() {
result = TBuiltinClassObject(Builtin::special("FunctionType"))
}
/** Get the `ClassValue` for the class of builtin functions. */
ClassValue builtinFunction() {
result = Value::named("len").getClass()
}
/** Get the `ClassValue` for the `int` class. */
ClassValue int_() {
result = TBuiltinClassObject(Builtin::special("int"))
}
/** Get the `ClassValue` for the `float` class. */
ClassValue float_() {
result = TBuiltinClassObject(Builtin::builtin("float"))
}
/** Get the `ClassValue` for the `bytes` class (also called `str` in Python 2). */
ClassValue bytes() {
result = TBuiltinClassObject(Builtin::special("bytes"))
}
/** Get the `ClassValue` for the class of unicode strings.
* `str` in Python 3 and `unicode` in Python 2. */
ClassValue unicode() {
result = TBuiltinClassObject(Builtin::special("unicode"))
}
/** Get the `ClassValue` for the `classmethod` class. */
ClassValue classmethod() {
result = TBuiltinClassObject(Builtin::special("ClassMethod"))
}
/** Get the `ClassValue` for the `staticmethod` class. */
ClassValue staticmethod() {
result = TBuiltinClassObject(Builtin::special("StaticMethod"))
}
/** Get the `ClassValue` for the class of modules. */
ClassValue module_() {
result = TBuiltinClassObject(Builtin::special("ModuleType"))
}
}

View File

@@ -0,0 +1,5 @@
| file://:0:0:0:0 | builtin-class bool | bool |
| file://:0:0:0:0 | builtin-class classmethod | classmethod |
| file://:0:0:0:0 | builtin-class float | float |
| file://:0:0:0:0 | builtin-class int | int |
| file://:0:0:0:0 | builtin-class object | object |

View File

@@ -0,0 +1,15 @@
import python
from ClassValue cls, string description
where
cls = ClassValue::bool() and description = "bool"
or
cls = ClassValue::int_() and description = "int"
or
cls = ClassValue::float_() and description = "float"
or
cls = ClassValue::classmethod() and description = "classmethod"
or
cls = ClassValue::bool().getMro().getItem(2) and description = "object"
select cls, description

View File

@@ -0,0 +1,16 @@
| 1 | file://:0:0:0:0 | int 1 |
| 2 | file://:0:0:0:0 | int 2 |
| 3 | file://:0:0:0:0 | int 3 |
| 4 | file://:0:0:0:0 | int 4 |
| 5 | file://:0:0:0:0 | int 5 |
| 6 | file://:0:0:0:0 | int 6 |
| 7 | file://:0:0:0:0 | int 7 |
| 8 | file://:0:0:0:0 | int 8 |
| 9 | file://:0:0:0:0 | int 9 |
| 10 | file://:0:0:0:0 | int 10 |
| 1000 | file://:0:0:0:0 | int 1000 |
| 1004 | file://:0:0:0:0 | int 1004 |
| b'b' | file://:0:0:0:0 | 'b' |
| b'd' | file://:0:0:0:0 | 'd' |
| u'a' | file://:0:0:0:0 | 'a' |
| u'c' | file://:0:0:0:0 | 'c' |

View File

@@ -0,0 +1,20 @@
import python
from string txt, Value val
where
exists(string s |
txt = "u'" + s + "'" and val = Value::forUnicode(s)
or
txt = "b'" + s + "'" and val = Value::forBytes(s)
|
s = "a" or s = "b" or s = "c" or s = "d"
)
or
exists(int i |
txt = i.toString() and val = Value::forInt(i)
|
i in [1..10] or i in [1000..1010]
)
select txt, val

View File

@@ -0,0 +1,5 @@
| file://:0:0:0:0 | Builtin-function exit | exit |
| file://:0:0:0:0 | Builtin-function len | len |
| file://:0:0:0:0 | builtin method append | list.append |
| test.py:8:1:8:10 | Function foo | foo |
| test.py:13:5:13:19 | Function C.meth | C.meth |

View File

@@ -0,0 +1,16 @@
import python
from FunctionValue v, string name
where name = v.getQualifiedName() and
(
v = Value::named("len")
or
v instanceof PythonFunctionValue
or
v = Value::named("sys.exit")
or
v = Value::named("list").(ClassValue).lookup("append")
)
select v, name

View File

@@ -0,0 +1,4 @@
| file://:0:0:0:0 | builtin-class ValueError | ValueError |
| file://:0:0:0:0 | builtin-class bool | bool |
| file://:0:0:0:0 | builtin-class slice | slice |
| file://:0:0:0:0 | list object | sys.argv |

View File

@@ -0,0 +1,12 @@
import python
from Value val, string name
where
val = Value::named(name)
and
(
name = "bool" or name = "sys" or name = "sys.argv" or
name = "ValueError" or name = "slice"
)
select val, name

View File

@@ -0,0 +1,14 @@
u"a"
b"b"
u"c"
b"d"
1000
1004
def foo():
pass
class C(object):
def meth(self):
pass