import python /** A file */ class File extends Container, @file { /** DEPRECATED: Use `getAbsolutePath` instead. */ deprecated override string getName() { result = this.getAbsolutePath() } /** DEPRECATED: Use `getAbsolutePath` instead. */ deprecated string getFullName() { result = this.getAbsolutePath() } /** * Holds if this element is at the specified location. * The location spans column `startcolumn` of line `startline` to * column `endcolumn` of line `endline` in file `filepath`. * For more information, see * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). */ predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { this.getAbsolutePath() = filepath and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 } /** Whether this file is a source code file. */ predicate fromSource() { /* If we start to analyse .pyc files, then this will have to change. */ any() } /** Gets a short name for this file (just the file name) */ string getShortName() { result = this.getBaseName() } private int lastLine() { result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i)) } /** Whether line n is empty (it contains neither code nor comment). */ predicate emptyLine(int n) { n in [0 .. this.lastLine()] and not occupied_line(this, n) } string getSpecifiedEncoding() { exists(Comment c, Location l | l = c.getLocation() and l.getFile() = this | l.getStartLine() < 3 and result = c.getText().regexpCapture(".*coding[:=]\\s*([-\\w.]+).*", 1) ) } override string getAbsolutePath() { files(this, result) } /** Gets the URL of this file. */ override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" } override Container getImportRoot(int n) { /* File stem must be a legal Python identifier */ this.getStem().regexpMatch("[^\\d\\W]\\w*") and result = this.getParent().getImportRoot(n) } /** * Gets the contents of this file as a string. * This will only work for those non-python files that * are specified to be extracted. */ string getContents() { file_contents(this, result) } /** Holds if this file is likely to get executed directly, and thus act as an entry point for execution. */ predicate isPossibleEntryPoint() { // Only consider files in the source code, and not things like the standard library exists(this.getRelativePath()) and ( // The file doesn't have the extension `.py` but still contains Python statements not this.getExtension().matches("py%") and exists(Stmt s | s.getLocation().getFile() = this) or // The file contains the usual `if __name__ == '__main__':` construction exists(If i, Name name, StrConst main, Cmpop op | i.getScope().(Module).getFile() = this and op instanceof Eq and i.getTest().(Compare).compares(name, op, main) and name.getId() = "__name__" and main.getText() = "__main__" ) and // Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but // contain this construct anyway. // // Their presence in a package (say, `foo`) means one can execute the package directly using // `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for // execution means treating imports as absolute, this causes trouble, since when run with // `python -m`, the interpreter uses the usual package semantics. not this.getShortName() = "__main__.py" or // The file contains a `#!` line referencing the python interpreter exists(Comment c | c.getLocation().getFile() = this and c.getLocation().getStartLine() = 1 and c.getText().regexpMatch("^#! */.*python(2|3)?[ \\\\t]*$") ) ) } } private predicate occupied_line(File f, int n) { exists(Location l | l.getFile() = f | l.getStartLine() = n or exists(StrConst s | s.getLocation() = l | n in [l.getStartLine() .. l.getEndLine()]) ) } /** A folder (directory) */ class Folder extends Container, @folder { /** DEPRECATED: Use `getAbsolutePath` instead. */ deprecated override string getName() { result = this.getAbsolutePath() } /** * Holds if this element is at the specified location. * The location spans column `startcolumn` of line `startline` to * column `endcolumn` of line `endline` in file `filepath`. * For more information, see * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). */ predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { this.getAbsolutePath() = filepath and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 } override string getAbsolutePath() { folders(this, result) } /** Gets the URL of this folder. */ override string getURL() { result = "folder://" + this.getAbsolutePath() } override Container getImportRoot(int n) { this.isImportRoot(n) and result = this or /* Folder must be a legal Python identifier */ this.getBaseName().regexpMatch("[^\\d\\W]\\w*") and result = this.getParent().getImportRoot(n) } } /** * A container is an abstract representation of a file system object that can * hold elements of interest. */ abstract class Container extends @container { Container getParent() { containerparent(result, this) } /** Gets a child of this container */ deprecated Container getChild() { containerparent(this, result) } /** * Gets a textual representation of the path of this container. * * This is the absolute path of the container. */ string toString() { result = this.getAbsolutePath() } /** Gets the name of this container */ abstract string getName(); /** * Gets the relative path of this file or folder from the root folder of the * analyzed source location. The relative path of the root folder itself is * the empty string. * * This has no result if the container is outside the source root, that is, * if the root folder is not a reflexive, transitive parent of this container. */ string getRelativePath() { exists(string absPath, string pref | absPath = this.getAbsolutePath() and sourceLocationPrefix(pref) | absPath = pref and result = "" or absPath = pref.regexpReplaceAll("/$", "") + "/" + result and not result.matches("/%") ) } /** Whether this file or folder is part of the standard library */ predicate inStdlib() { this.inStdlib(_, _) } /** * Whether this file or folder is part of the standard library * for version `major.minor` */ predicate inStdlib(int major, int minor) { exists(Module m | m.getPath() = this and m.inStdLib(major, minor) ) } /* Standard cross-language API */ /** Gets a file or sub-folder in this container. */ Container getAChildContainer() { containerparent(this, result) } /** Gets a file in this container. */ File getAFile() { result = this.getAChildContainer() } /** Gets a sub-folder in this container. */ Folder getAFolder() { result = this.getAChildContainer() } /** * Gets the absolute, canonical path of this container, using forward slashes * as path separator. * * The path starts with a _root prefix_ followed by zero or more _path * segments_ separated by forward slashes. * * The root prefix is of one of the following forms: * * 1. A single forward slash `/` (Unix-style) * 2. An upper-case drive letter followed by a colon and a forward slash, * such as `C:/` (Windows-style) * 3. Two forward slashes, a computer name, and then another forward slash, * such as `//FileServer/` (UNC-style) * * Path segments are never empty (that is, absolute paths never contain two * contiguous slashes, except as part of a UNC-style root prefix). Also, path * segments never contain forward slashes, and no path segment is of the * form `.` (one dot) or `..` (two dots). * * Note that an absolute path never ends with a forward slash, except if it is * a bare root prefix, that is, the path has no path segments. A container * whose absolute path has no segments is always a `Folder`, not a `File`. */ abstract string getAbsolutePath(); /** * Gets the base name of this container including extension, that is, the last * segment of its absolute path, or the empty string if it has no segments. * * Here are some examples of absolute paths and the corresponding base names * (surrounded with quotes to avoid ambiguity): * * * * * * * * * *
Absolute pathBase name
"/tmp/tst.py""tst.py"
"C:/Program Files (x86)""Program Files (x86)"
"/"""
"C:/"""
"D:/"""
"//FileServer/"""
*/ string getBaseName() { result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1) } /** * Gets the extension of this container, that is, the suffix of its base name * after the last dot character, if any. * * In particular, * * - if the name does not include a dot, there is no extension, so this * predicate has no result; * - if the name ends in a dot, the extension is the empty string; * - if the name contains multiple dots, the extension follows the last dot. * * Here are some examples of absolute paths and the corresponding extensions * (surrounded with quotes to avoid ambiguity): * * * * * * * * *
Absolute pathExtension
"/tmp/tst.py""py"
"/tmp/.gitignore""gitignore"
"/bin/bash"not defined
"/tmp/tst2."""
"/tmp/x.tar.gz""gz"
*/ string getExtension() { result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) } /** * Gets the stem of this container, that is, the prefix of its base name up to * (but not including) the last dot character if there is one, or the entire * base name if there is not. * * Here are some examples of absolute paths and the corresponding stems * (surrounded with quotes to avoid ambiguity): * * * * * * * * *
Absolute pathStem
"/tmp/tst.py""tst"
"/tmp/.gitignore"""
"/bin/bash""bash"
"/tmp/tst2.""tst2"
"/tmp/x.tar.gz""x.tar"
*/ string getStem() { result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) } File getFile(string baseName) { result = this.getAFile() and result.getBaseName() = baseName } Folder getFolder(string baseName) { result = this.getAFolder() and result.getBaseName() = baseName } Container getParentContainer() { this = result.getAChildContainer() } Container getChildContainer(string baseName) { result = this.getAChildContainer() and result.getBaseName() = baseName } /** * Gets a URL representing the location of this container. * * For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls). */ abstract string getURL(); /** Holds if this folder is on the import path. */ predicate isImportRoot() { this.isImportRoot(_) } /** * Holds if this folder is on the import path, at index `n` in the list of * paths. The list of paths is composed of the paths passed to the extractor and * `sys.path`. */ predicate isImportRoot(int n) { this.getName() = import_path_element(n) } /** Holds if this folder is the root folder for the standard library. */ predicate isStdLibRoot(int major, int minor) { major = major_version() and minor = minor_version() and this.isStdLibRoot() } /** Holds if this folder is the root folder for the standard library. */ predicate isStdLibRoot() { /* * Look for a standard lib module and find its import path * We use `os` as it is the most likely to be imported and * `tty` because it is small for testing. */ exists(Module m | m.getName() = "os" or m.getName() = "tty" | m.getFile().getImportRoot() = this ) } /** Gets the path element from which this container would be loaded. */ Container getImportRoot() { exists(int n | result = this.getImportRoot(n) and not exists(int m | exists(this.getImportRoot(m)) and m < n ) ) } /** Gets the path element from which this container would be loaded, given the index into the list of possible paths `n`. */ abstract Container getImportRoot(int n); } private string import_path_element(int n) { exists(string path, string pathsep, int k | path = get_path("extractor.path") and k = 0 or path = get_path("sys.path") and k = count(get_path("extractor.path").splitAt(pathsep)) | py_flags_versioned("os.pathsep", pathsep, _) and result = path.splitAt(pathsep, n - k).replaceAll("\\", "/") ) } private string get_path(string name) { py_flags_versioned(name, result, _) } class Location extends @location { /** Gets the file for this location */ File getFile() { result = this.getPath() } private Container getPath() { locations_default(this, result, _, _, _, _) or exists(Module m | locations_ast(this, m, _, _, _, _) | result = m.getPath()) } /** Gets the 1-based line number (inclusive) where this location starts. */ int getStartLine() { locations_default(this, _, result, _, _, _) or locations_ast(this, _, result, _, _, _) } /** Gets the 1-based column number (inclusive) where this location starts. */ int getStartColumn() { locations_default(this, _, _, result, _, _) or locations_ast(this, _, _, result, _, _) } /** Gets the 1-based line number (inclusive) where this location ends. */ int getEndLine() { locations_default(this, _, _, _, result, _) or locations_ast(this, _, _, _, result, _) } /** Gets the 1-based column number (inclusive) where this location ends. */ int getEndColumn() { locations_default(this, _, _, _, _, result) or locations_ast(this, _, _, _, _, result) } /** Gets a textual representation of this element. */ string toString() { result = this.getPath().getAbsolutePath() + ":" + this.getStartLine().toString() } /** * Holds if this element is at the specified location. * The location spans column `startcolumn` of line `startline` to * column `endcolumn` of line `endline` in file `filepath`. * For more information, see * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). */ predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { exists(File f | f.getAbsolutePath() = filepath | locations_default(this, f, startline, startcolumn, endline, endcolumn) or exists(Module m | m.getFile() = f | locations_ast(this, m, startline, startcolumn, endline, endcolumn) ) ) or // Packages have no suitable filepath, so we use just the path instead. exists(Module m | not exists(m.getFile()) | filepath = m.getPath().getAbsolutePath() and locations_ast(this, m, startline, startcolumn, endline, endcolumn) ) } } /** A non-empty line in the source code */ class Line extends @py_line { /** * Holds if this element is at the specified location. * The location spans column `startcolumn` of line `startline` to * column `endcolumn` of line `endline` in file `filepath`. * For more information, see * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). */ predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { exists(Module m | m.getFile().getAbsolutePath() = filepath and endline = startline and startcolumn = 1 and py_line_lengths(this, m, startline, endcolumn) ) } /** Gets a textual representation of this element. */ string toString() { exists(Module m | py_line_lengths(this, m, _, _) | result = m.getFile().getShortName() + ":" + this.getLineNumber().toString() ) } /** Gets the line number of this line */ int getLineNumber() { py_line_lengths(this, _, result, _) } /** Gets the length of this line */ int getLength() { py_line_lengths(this, _, _, result) } /** Gets the file for this line */ Module getModule() { py_line_lengths(this, result, _, _) } } /** * A syntax error. Note that if there is a syntax error in a module, * much information about that module will be lost */ class SyntaxError extends Location { SyntaxError() { py_syntax_error_versioned(this, _, major_version().toString()) } override string toString() { result = "Syntax Error" } /** Gets the message corresponding to this syntax error */ string getMessage() { py_syntax_error_versioned(this, result, major_version().toString()) } } /** * An encoding error. Note that if there is an encoding error in a module, * much information about that module will be lost */ class EncodingError extends SyntaxError { EncodingError() { /* Leave spaces around 'decode' in unlikely event it occurs as a name in a syntax error */ this.getMessage().toLowerCase().matches("% decode %") } override string toString() { result = "Encoding Error" } }