Merge pull request #5880 from tausbn/python-limit-builtins

Python: Limit set of globals that may be built-ins
This commit is contained in:
Rasmus Wriedt Larsen
2021-05-20 14:47:22 +02:00
committed by GitHub
3 changed files with 99 additions and 21 deletions

View File

@@ -11,11 +11,12 @@
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
from CallNode call, Context context, ControlFlowNode func
from DataFlow::CallCfgNode call
where
context.getAVersion().includes(2, _) and
call.getFunction() = func and
func.pointsTo(context, Value::named("input"), _) and
not func.pointsTo(context, Value::named("raw_input"), _)
major_version() = 2 and
call = API::builtin("input").getACall() and
call != API::builtin("raw_input").getACall()
select call, "The unsafe built-in function 'input' is used in Python 2."

View File

@@ -349,22 +349,95 @@ module API {
)
}
private import semmle.python.types.Builtins as Builtins
/** Gets the name of a known built-in. */
private string getBuiltInName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and does not account for things like overwriting a
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
result.asCfgNode() =
any(NameNode n |
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
)
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
)
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
}
/**