Merge branch 'main' of github.com:github/codeql into htmlReg

This commit is contained in:
Erik Krogh Kristensen
2021-10-26 14:46:27 +02:00
2275 changed files with 216237 additions and 23741 deletions

View File

@@ -52,11 +52,7 @@ predicate is_stateful(Class c) {
call.getFunc() = a and
a.getName() = name
|
name = "pop" or
name = "remove" or
name = "discard" or
name = "extend" or
name = "append"
name in ["pop", "remove", "discard", "extend", "append"]
)
}

View File

@@ -1,23 +0,0 @@
/**
* @name Python extraction errors
* @description List all extraction errors for Python files in the source code directory.
* @kind diagnostic
* @id py/diagnostics/extraction-errors
*/
import python
/**
* Gets the SARIF severity for errors.
*
* See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
* what error means.
*/
int getErrorSeverity() { result = 2 }
from SyntaxError error, File file
where
file = error.getFile() and
exists(file.getRelativePath())
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
getErrorSeverity()

View File

@@ -0,0 +1,36 @@
/**
* @name Python extraction warnings
* @description List all extraction warnings for Python files in the source code directory.
* @kind diagnostic
* @id py/diagnostics/extraction-warnings
*/
import python
/**
* Gets the SARIF severity for warnings.
*
* See https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
*/
int getWarningSeverity() { result = 1 }
// The spec
// https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10541338
// defines error and warning as:
//
// "error": A serious problem was found. The condition encountered by the tool resulted
// in the analysis being halted or caused the results to be incorrect or incomplete.
//
// "warning": A problem that is not considered serious was found. The condition
// encountered by the tool is such that it is uncertain whether a problem occurred, or
// is such that the analysis might be incomplete but the results that were generated are
// probably valid.
//
// So SyntaxErrors are reported at the warning level, since analysis might be incomplete
// but the results that were generated are probably valid.
from SyntaxError error, File file
where
file = error.getFile() and
exists(file.getRelativePath())
select error, "Extraction failed in " + file + " with error " + error.getMessage(),
getWarningSeverity()

View File

@@ -22,49 +22,14 @@ private predicate indexing_method(string name) {
}
private predicate arithmetic_method(string name) {
name = "__add__" or
name = "__sub__" or
name = "__div__" or
name = "__pos__" or
name = "__abs__" or
name = "__floordiv__" or
name = "__div__" or
name = "__divmod__" or
name = "__lshift__" or
name = "__and__" or
name = "__or__" or
name = "__xor__" or
name = "__rshift__" or
name = "__pow__" or
name = "__mul__" or
name = "__neg__" or
name = "__radd__" or
name = "__rsub__" or
name = "__rdiv__" or
name = "__rfloordiv__" or
name = "__rdiv__" or
name = "__rlshift__" or
name = "__rand__" or
name = "__ror__" or
name = "__rxor__" or
name = "__rrshift__" or
name = "__rpow__" or
name = "__rmul__" or
name = "__truediv__" or
name = "__rtruediv__" or
name = "__iadd__" or
name = "__isub__" or
name = "__idiv__" or
name = "__ifloordiv__" or
name = "__idiv__" or
name = "__ilshift__" or
name = "__iand__" or
name = "__ior__" or
name = "__ixor__" or
name = "__irshift__" or
name = "__ipow__" or
name = "__imul__" or
name = "__itruediv__"
name in [
"__add__", "__sub__", "__or__", "__xor__", "__rshift__", "__pow__", "__mul__", "__neg__",
"__radd__", "__rsub__", "__rdiv__", "__rfloordiv__", "__div__", "__rdiv__", "__rlshift__",
"__rand__", "__ror__", "__rxor__", "__rrshift__", "__rpow__", "__rmul__", "__truediv__",
"__rtruediv__", "__pos__", "__iadd__", "__isub__", "__idiv__", "__ifloordiv__", "__idiv__",
"__ilshift__", "__iand__", "__ior__", "__ixor__", "__irshift__", "__abs__", "__ipow__",
"__imul__", "__itruediv__", "__floordiv__", "__div__", "__divmod__", "__lshift__", "__and__"
]
}
private predicate ordering_method(string name) {

View File

@@ -24,7 +24,6 @@ where
not derived.getScope().isSpecialMethod() and
derived.getName() != "__init__" and
derived.isNormalMethod() and
not derived.getScope().isSpecialMethod() and
// call to overrides distributed for efficiency
(
derived.overrides(base) and derived.minParameters() > base.maxParameters()

View File

@@ -13,98 +13,29 @@
import python
predicate is_unary_op(string name) {
name = "__del__" or
name = "__repr__" or
name = "__str__" or
name = "__hash__" or
name = "__bool__" or
name = "__nonzero__" or
name = "__unicode__" or
name = "__len__" or
name = "__iter__" or
name = "__reversed__" or
name = "__neg__" or
name = "__pos__" or
name = "__abs__" or
name = "__invert__" or
name = "__complex__" or
name = "__int__" or
name = "__float__" or
name = "__long__" or
name = "__oct__" or
name = "__hex__" or
name = "__index__" or
name = "__enter__"
name in [
"__del__", "__repr__", "__neg__", "__pos__", "__abs__", "__invert__", "__complex__",
"__int__", "__float__", "__long__", "__oct__", "__hex__", "__str__", "__index__", "__enter__",
"__hash__", "__bool__", "__nonzero__", "__unicode__", "__len__", "__iter__", "__reversed__"
]
}
predicate is_binary_op(string name) {
name = "__lt__" or
name = "__le__" or
name = "__eq__" or
name = "__ne__" or
name = "__gt__" or
name = "__ge__" or
name = "__cmp__" or
name = "__rcmp__" or
name = "__getattr___" or
name = "__getattribute___" or
name = "__delattr__" or
name = "__delete__" or
name = "__instancecheck__" or
name = "__subclasscheck__" or
name = "__getitem__" or
name = "__delitem__" or
name = "__contains__" or
name = "__add__" or
name = "__sub__" or
name = "__mul__" or
name = "__floordiv__" or
name = "__div__" or
name = "__truediv__" or
name = "__mod__" or
name = "__divmod__" or
name = "__lshift__" or
name = "__rshift__" or
name = "__and__" or
name = "__xor__" or
name = "__or__" or
name = "__radd__" or
name = "__rsub__" or
name = "__rmul__" or
name = "__rfloordiv__" or
name = "__rdiv__" or
name = "__rtruediv__" or
name = "__rmod__" or
name = "__rdivmod__" or
name = "__rpow__" or
name = "__rlshift__" or
name = "__rrshift__" or
name = "__rand__" or
name = "__rxor__" or
name = "__ror__" or
name = "__iadd__" or
name = "__isub__" or
name = "__imul__" or
name = "__ifloordiv__" or
name = "__idiv__" or
name = "__itruediv__" or
name = "__imod__" or
name = "__idivmod__" or
name = "__ipow__" or
name = "__ilshift__" or
name = "__irshift__" or
name = "__iand__" or
name = "__ixor__" or
name = "__ior__" or
name = "__coerce__"
name in [
"__lt__", "__le__", "__delattr__", "__delete__", "__instancecheck__", "__subclasscheck__",
"__getitem__", "__delitem__", "__contains__", "__add__", "__sub__", "__mul__", "__eq__",
"__floordiv__", "__div__", "__truediv__", "__mod__", "__divmod__", "__lshift__", "__rshift__",
"__and__", "__xor__", "__or__", "__ne__", "__radd__", "__rsub__", "__rmul__", "__rfloordiv__",
"__rdiv__", "__rtruediv__", "__rmod__", "__rdivmod__", "__rpow__", "__rlshift__", "__gt__",
"__rrshift__", "__rand__", "__rxor__", "__ror__", "__iadd__", "__isub__", "__imul__",
"__ifloordiv__", "__idiv__", "__itruediv__", "__ge__", "__imod__", "__idivmod__", "__ipow__",
"__ilshift__", "__irshift__", "__iand__", "__ixor__", "__ior__", "__coerce__", "__cmp__",
"__rcmp__", "__getattr___", "__getattribute___"
]
}
predicate is_ternary_op(string name) {
name = "__setattr__" or
name = "__set__" or
name = "__setitem__" or
name = "__getslice__" or
name = "__delslice__"
name in ["__setattr__", "__set__", "__setitem__", "__getslice__", "__delslice__"]
}
predicate is_quad_op(string name) { name = "__setslice__" or name = "__exit__" }
@@ -132,12 +63,12 @@ predicate incorrect_special_method_defn(
else
if required < func.minParameters()
then message = "Too many parameters" and show_counts = true
else
if func.minParameters() < required and not func.getScope().hasVarArg()
then
message = (required - func.minParameters()) + " default values(s) will never be used" and
show_counts = false
else none()
else (
func.minParameters() < required and
not func.getScope().hasVarArg() and
message = (required - func.minParameters()) + " default values(s) will never be used" and
show_counts = false
)
)
}

View File

@@ -197,7 +197,7 @@ class CommentedOutCodeBlock extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -297,41 +297,17 @@ private predicate file_or_url(Comment c) {
c.getText().regexpMatch("#[^'\"]+(\\[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*")
}
private string operator_keyword() {
result = "import" or
result = "and" or
result = "is" or
result = "or" or
result = "in" or
result = "not" or
result = "as"
}
private string operator_keyword() { result in ["import", "and", "is", "or", "in", "not", "as"] }
private string keyword_requiring_colon() {
result = "try" or
result = "while" or
result = "elif" or
result = "else" or
result = "if" or
result = "except" or
result = "def" or
result = "class"
result in ["try", "while", "elif", "else", "if", "except", "def", "class"]
}
private string other_keyword() {
result = "del" or
result = "lambda" or
result = "from" or
result = "global" or
result = "with" or
result = "assert" or
result = "yield" or
result = "finally" or
result = "print" or
result = "exec" or
result = "raise" or
result = "return" or
result = "for"
result in [
"del", "lambda", "raise", "return", "for", "from", "global", "with", "assert", "yield",
"finally", "print", "exec"
]
}
private string a_keyword() {

View File

@@ -20,7 +20,7 @@ class RangeFunction extends Function {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
@@ -40,7 +40,7 @@ class RangeClass extends Class {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -1,53 +0,0 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
The <code>TextClause</code> class in the <code>SQLAlchemy</code> PyPI package represents
a textual SQL string directly. If user-input is added to it without sufficient
sanitization, a user may be able to run malicious database queries, since the
<code>TextClause</code> is inserted directly into the final SQL.
</p>
</overview>
<recommendation>
<p>
Don't allow user-input to be added to a <code>TextClause</code>, instead construct your
full query with constructs from the ORM, or use query parameters for user-input.
</p>
</recommendation>
<example>
<p>
In the following snippet, a user is fetched from the database using three
different queries.
</p>
<p>
In the first case, the final query string is built by directly including a user-supplied
input. The parameter may include quote characters, so this code is vulnerable to a SQL
injection attack.
</p>
<p>
In the second case, the query is built using ORM models, but part of it is using a
<code>TextClause</code> directly including a user-supplied input. Since the
<code>TextClause</code> is inserted directly into the final SQL, this code is vulnerable
to a SQL injection attack.
</p>
<p>
In the third case, the query is built fully using the ORM models, so in the end, the
user-supplied input will be passed to the database using query parameters. The
database connector library will take care of escaping and inserting quotes as needed.
</p>
<sample src="examples/sqlalchemy_textclause_injection.py" />
</example>
<references>
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">Official documentation of the text parameter</a>.</li>
</references>
</qhelp>

View File

@@ -1,23 +0,0 @@
/**
* @name SQLAlchemy TextClause built from user-controlled sources
* @description Building a TextClause query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 8.8
* @precision high
* @id py/sqlalchemy-textclause-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import semmle.python.security.dataflow.SQLAlchemyTextClause
import DataFlow::PathGraph
from SQLAlchemyTextClause::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"This SQLAlchemy TextClause depends on $@, which could lead to SQL injection.", source.getNode(),
"a user-provided value"

View File

@@ -9,6 +9,13 @@ If a database query (such as a SQL or NoSQL query) is built from
user-provided data without sufficient sanitization, a user
may be able to run malicious database queries.
</p>
<p>
This also includes using the <code>TextClause</code> class in the
<code><a href="https://pypi.org/project/SQLAlchemy/">SQLAlchemy</a></code> PyPI package,
which is used to represent a literal SQL fragment and is inserted directly into the
final SQL when used in a query built using the ORM.
</p>
</overview>
<recommendation>
@@ -52,5 +59,6 @@ vulnerable to SQL injection attacks. In this example, if <code>username</code> w
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">SQLAlchemy documentation for TextClause</a>.</li>
</references>
</qhelp>

View File

@@ -1,34 +0,0 @@
from flask import Flask, request
import sqlalchemy
import sqlalchemy.orm
app = Flask(__name__)
engine = sqlalchemy.create_engine(...)
Base = sqlalchemy.orm.declarative_base()
class User(Base):
__tablename__ = "users"
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
username = sqlalchemy.Column(sqlalchemy.String)
@app.route("/users/<username>")
def show_user(username):
session = sqlalchemy.orm.Session(engine)
# BAD, normal SQL injection
stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username))
results = session.execute(stmt).fetchall()
# BAD, allows SQL injection
username_formatted_for_sql = sqlalchemy.text("'{}'".format(username))
stmt = sqlalchemy.select(User).where(User.username == username_formatted_for_sql)
results = session.execute(stmt).scalars().all()
# GOOD, does not allow for SQL injection
stmt = sqlalchemy.select(User).where(User.username == username)
results = session.execute(stmt).scalars().all()
...

View File

@@ -5,25 +5,24 @@
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.RegexInjection
private import semmle.python.Concepts
import semmle.python.security.injection.RegexInjection
import DataFlow::PathGraph
from
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
RegexInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexExecution regexExecution
where
config.hasFlowPath(source, sink) and
regexInjectionSink = sink.getNode() and
methodAttribute = regexInjectionSink.getRegexMethod()
regexExecution = sink.getNode().(RegexInjection::Sink).getRegexExecution()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", methodAttribute,
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
source.getNode(), "user-provided value", regexExecution, regexExecution.getName()

View File

@@ -88,7 +88,7 @@ class CredentialSink extends TaintSink {
CredentialSink() {
exists(string name |
name.regexpMatch(getACredentialRegex()) and
not name.suffix(name.length() - 4) = "file"
not name.matches("%file")
|
any(FunctionValue func).getNamedArgumentForCall(_, name) = this
or

View File

@@ -15,16 +15,9 @@ import python
predicate func_with_side_effects(Expr e) {
exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() |
name = "print" or
name = "write" or
name = "append" or
name = "pop" or
name = "remove" or
name = "discard" or
name = "delete" or
name = "close" or
name = "open" or
name = "exit"
name in [
"print", "write", "append", "pop", "remove", "discard", "delete", "close", "open", "exit"
]
)
}

View File

@@ -88,7 +88,7 @@ class SuppressionScope extends @py_comment {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -8,15 +8,10 @@ import python
import DefinitionTracking
predicate uniqueness_error(int number, string what, string problem) {
(
what = "toString" or
what = "getLocation" or
what = "getNode" or
what = "getDefinition" or
what = "getEntryNode" or
what = "getOrigin" or
what = "getAnInferredType"
) and
what in [
"toString", "getLocation", "getNode", "getDefinition", "getEntryNode", "getOrigin",
"getAnInferredType"
] and
(
number = 0 and problem = "no results for " + what + "()"
or
@@ -141,7 +136,7 @@ predicate builtin_object_consistency(string clsname, string problem, string what
or
not exists(o.toString()) and
problem = "no toString" and
not exists(string name | name.prefix(7) = "_semmle" | py_special_objects(o, name)) and
not exists(string name | name.matches("\\_semmle%") | py_special_objects(o, name)) and
not o = unknownValue()
)
}

View File

@@ -477,7 +477,7 @@ class NiceLocationExpr extends @py_expr {
* The location spans column `bc` of line `bl` to
* column `ec` of line `el` in file `f`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(string f, int bl, int bc, int el, int ec) {
/* Attribute location for x.y is that of 'y' so that url does not overlap with that of 'x' */

View File

@@ -0,0 +1,26 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If an HTTP Header is built using string concatenation or string formatting, and the
components of the concatenation include user input, a user
is likely to be able to manipulate the response.</p>
</overview>
<recommendation>
<p>User input should not be included in an HTTP Header.</p>
</recommendation>
<example>
<p>In the following example, the code appends a user-provided value into a header.</p>
<sample src="header_injection.py" />
</example>
<references>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/HTTP_Response_Splitting">HTTP Response Splitting</a>.</li>
<li>Python Security: <a href="https://python-security.readthedocs.io/vuln/http-header-injection.html">HTTP header injection</a>.</li>
<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-5167">RSPEC-5167</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name HTTP Header Injection
* @description User input should not be used in HTTP headers, otherwise a malicious user
* may be able to inject a value that could manipulate the response.
* @kind path-problem
* @problem.severity error
* @id py/header-injection
* @tags security
* external/cwe/cwe-113
* external/cwe/cwe-079
*/
// determine precision above
import python
import experimental.semmle.python.security.injection.HTTPHeaders
import DataFlow::PathGraph
from HeaderInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ HTTP header is constructed from a $@.", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -0,0 +1,9 @@
from flask import Response, request, Flask, make_response
@app.route("/flask_Response")
def flask_Response():
rfs_header = request.args["rfs_header"]
response = Response()
response.headers['HeaderName'] = rfs_header
return response

View File

@@ -0,0 +1,49 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>
<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
</overview>
<recommendation>
<p>
User input should be suitably sanitized before it is logged.
</p>
<p>
If the log entries are plain text then line breaks should be removed from user input, using for example
<code>replace(old, new)</code> or similar. Care should also be taken that user input is clearly marked
in log entries, and that a malicious user cannot cause confusion in other ways.
</p>
<p>
For log entries that will be displayed in HTML, user input should be HTML encoded before being logged, to prevent forgery and
other forms of HTML injection.
</p>
</recommendation>
<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />
<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>
<sample src="LogInjectionGood.py" />
</example>
<references>
<li>OWASP: <a href="https://owasp.org/www-community/attacks/Log_Injection">Log Injection</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,20 @@
/**
* @name Log Injection
* @description Building log entries from user-controlled data is vulnerable to
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/
import python
import experimental.semmle.python.security.injection.LogInjection
import DataFlow::PathGraph
from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc Log Injection
"""
from flask import Flask
from flask import request
from django.utils.log import request_logger
import logging
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
@app.route('/bad1')
def bad1():
name = request.args.get('name')
app.logger.info('User name: ' + name) # Bad
return 'bad1'
@app.route('/bad2')
def bad2():
name = request.args.get('name')
logging.info('User name: ' + name) # Bad
return 'bad2'
@app.route('/bad3')
def bad3():
name = request.args.get('name')
request_logger.warn('User name: ' + name) # Bad
return 'bad3'
@app.route('/bad4')
def bad4():
name = request.args.get('name')
logtest = logging.getLogger('test')
logtest.debug('User name: ' + name) # Bad
return 'bad4'
if __name__ == '__main__':
app.debug = True
handler = logging.FileHandler('log')
app.logger.addHandler(handler)
app.run()

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc Log Injection
"""
from flask import Flask
from flask import request
import logging
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
@app.route('/good1')
def good1():
name = request.args.get('name')
name = name.replace('\r\n','').replace('\n','')
logging.info('User name: ' + name) # Good
return 'good1'
if __name__ == '__main__':
app.debug = True
handler = logging.FileHandler('log')
app.logger.addHandler(handler)
app.run()

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Desc ip address spoofing
"""
from flask import Flask
from flask import request
app = Flask(__name__)
@app.route('/bad1')
def bad1():
client_ip = request.headers.get('x-forwarded-for')
if not client_ip.startswith('192.168.'):
raise Exception('ip illegal')
return 'bad1'
@app.route('/bad2')
def bad2():
client_ip = request.headers.get('x-forwarded-for')
if not client_ip == '127.0.0.1':
raise Exception('ip illegal')
return 'bad2'
@app.route('/good1')
def good1():
client_ip = request.headers.get('x-forwarded-for')
client_ip = client_ip.split(',')[client_ip.split(',').length - 1]
if not client_ip == '127.0.0.1':
raise Exception('ip illegal')
return 'good1'
if __name__ == '__main__':
app.debug = True
app.run()

View File

@@ -0,0 +1,35 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>An original client IP address is retrieved from an http header (<code>X-Forwarded-For</code> or <code>X-Real-IP</code> or <code>Proxy-Client-IP</code>
etc.), which is used to ensure security. Attackers can forge the value of these identifiers to
bypass a ban-list, for example.</p>
</overview>
<recommendation>
<p>Do not trust the values of HTTP headers allegedly identifying the originating IP. If you are aware your application will run behind some reverse proxies then the last entry of a <code>X-Forwarded-For</code> header value may be more trustworthy than the rest of it because some reverse proxies append the IP address they observed to the end of any remote-supplied header.</p>
</recommendation>
<example>
<p>The following examples show the bad case and the good case respectively.
In <code>bad1</code> method and <code>bad2</code> method, the client ip the <code>X-Forwarded-For</code> is split into comma-separated values, but the less-trustworthy first one is used. Both of these examples could be deceived by providing a forged HTTP header. The method
<code>good1</code> similarly splits an <code>X-Forwarded-For</code> value, but uses the last, more-trustworthy entry.</p>
<sample src="ClientSuppliedIpUsedInSecurityCheck.py" />
</example>
<references>
<li>Dennis Schneider: <a href="https://www.dennis-schneider.com/blog/prevent-ip-address-spoofing-with-x-forwarded-for-header-and-aws-elb-in-clojure-ring/">
Prevent IP address spoofing with X-Forwarded-For header when using AWS ELB and Clojure Ring</a>
</li>
<li>Security Rule Zero: <a href="https://www.f5.com/company/blog/security-rule-zero-a-warning-about-x-forwarded-for">A Warning about X-Forwarded-For</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,56 @@
/**
* @name IP address spoofing
* @description A remote endpoint identifier is read from an HTTP header. Attackers can modify the value
* of the identifier to forge the client ip.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/ip-address-spoofing
* @tags security
* external/cwe/cwe-348
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.ApiGraphs
import ClientSuppliedIpUsedInSecurityCheckLib
import DataFlow::PathGraph
/**
* Taint-tracking configuration tracing flow from obtaining a client ip from an HTTP header to a sensitive use.
*/
class ClientSuppliedIpUsedInSecurityCheckConfig extends TaintTracking::Configuration {
ClientSuppliedIpUsedInSecurityCheckConfig() { this = "ClientSuppliedIpUsedInSecurityCheckConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof ClientSuppliedIpUsedInSecurityCheck
}
override predicate isSink(DataFlow::Node sink) { sink instanceof PossibleSecurityCheck }
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallCfgNode ccn |
ccn = API::moduleImport("netaddr").getMember("IPAddress").getACall() and
ccn.getArg(0) = pred and
ccn = succ
)
}
override predicate isSanitizer(DataFlow::Node node) {
// `client_supplied_ip.split(",")[n]` for `n` > 0
exists(Subscript ss |
not ss.getIndex().(IntegerLiteral).getText() = "0" and
ss.getObject().(Call).getFunc().(Attribute).getName() = "split" and
ss.getObject().(Call).getAnArg().(StrConst).getText() = "," and
ss = node.asExpr()
)
}
}
from
ClientSuppliedIpUsedInSecurityCheckConfig config, DataFlow::PathNode source,
DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "IP address spoofing might include code from $@.",
source.getNode(), "this user input"

View File

@@ -0,0 +1,152 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* A data flow source of the client ip obtained according to the remote endpoint identifier specified
* (`X-Forwarded-For`, `X-Real-IP`, `Proxy-Client-IP`, etc.) in the header.
*
* For example: `request.headers.get("X-Forwarded-For")`.
*/
abstract class ClientSuppliedIpUsedInSecurityCheck extends DataFlow::CallCfgNode { }
private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
FlaskClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "flask.request" and this.getFunction() = get
|
// `get` is a call to request.headers.get or request.headers.get_all or request.headers.getlist
// request.headers
get.getObject()
.(DataFlow::AttrRead)
// request
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() in ["get", "get_all", "getlist"] and
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
DjangoClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "django.http.request.HttpRequest" and this.getFunction() = get
|
// `get` is a call to request.headers.get or request.META.get
// request.headers
get.getObject()
.(DataFlow::AttrRead)
// request
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() = "get" and
get.getObject().(DataFlow::AttrRead).getAttributeName() in ["headers", "META"] and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private class TornadoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
TornadoClientSuppliedIpUsedInSecurityCheck() {
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
rfs.getSourceType() = "tornado.web.RequestHandler" and this.getFunction() = get
|
// `get` is a call to `rfs`.request.headers.get
// `rfs`.request.headers
get.getObject()
.(DataFlow::AttrRead)
// `rfs`.request
.getObject()
.(DataFlow::AttrRead)
// `rfs`
.getObject()
.getALocalSource() = rfs and
get.getAttributeName() in ["get", "get_list"] and
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
)
}
}
private string clientIpParameterName() {
result in [
"x-forwarded-for", "x_forwarded_for", "x-real-ip", "x_real_ip", "proxy-client-ip",
"proxy_client_ip", "wl-proxy-client-ip", "wl_proxy_client_ip", "http_x_forwarded_for",
"http-x-forwarded-for", "http_x_forwarded", "http_x_cluster_client_ip", "http_client_ip",
"http_forwarded_for", "http_forwarded", "http_via", "remote_addr"
]
}
/** A data flow sink for ip address forgery vulnerabilities. */
abstract class PossibleSecurityCheck extends DataFlow::Node { }
/** A data flow sink for sql operation. */
private class SqlOperationAsSecurityCheck extends PossibleSecurityCheck {
SqlOperationAsSecurityCheck() { this = any(SqlExecution e).getSql() }
}
/**
* A data flow sink for remote client ip comparison.
*
* For example: `if not ipAddr.startswith('192.168.') : ...` determine whether the client ip starts
* with `192.168.`, and the program can be deceived by forging the ip address.
*/
private class CompareSink extends PossibleSecurityCheck {
CompareSink() {
exists(Call call |
call.getFunc().(Attribute).getName() = "startswith" and
call.getArg(0).(StrConst).getText().regexpMatch(getIpAddressRegex()) and
not call.getArg(0).(StrConst).getText() = "0:0:0:0:0:0:0:1" and
call.getFunc().(Attribute).getObject() = this.asExpr()
)
or
exists(Compare compare |
(
compare.getOp(0) instanceof Eq or
compare.getOp(0) instanceof NotEq
) and
(
compare.getLeft() = this.asExpr() and
compare.getComparator(0).(StrConst).getText() instanceof PrivateHostName and
not compare.getComparator(0).(StrConst).getText() = "0:0:0:0:0:0:0:1"
or
compare.getComparator(0) = this.asExpr() and
compare.getLeft().(StrConst).getText() instanceof PrivateHostName and
not compare.getLeft().(StrConst).getText() = "0:0:0:0:0:0:0:1"
)
)
or
exists(Compare compare |
(
compare.getOp(0) instanceof In or
compare.getOp(0) instanceof NotIn
) and
(
compare.getLeft() = this.asExpr()
or
compare.getComparator(0) = this.asExpr() and
not compare.getLeft().(StrConst).getText() in ["%", ",", "."]
)
)
}
}
string getIpAddressRegex() {
result =
"^((10\\.((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)(\\.)?)|(192\\.168\\.)|172\\.(1[6789]|2[0-9]|3[01])\\.)((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)(\\.)?((1\\d{2})?|(2[0-4]\\d)?|(25[0-5])?|([1-9]\\d|[0-9])?)$"
}
/**
* A string matching private host names of IPv4 and IPv6, which only matches the host portion therefore checking for port is not necessary.
* Several examples are localhost, reserved IPv4 IP addresses including 127.0.0.1, 10.x.x.x, 172.16.x,x, 192.168.x,x, and reserved IPv6 addresses including [0:0:0:0:0:0:0:1] and [::1]
*/
private class PrivateHostName extends string {
bindingset[this]
PrivateHostName() {
this.regexpMatch("(?i)localhost(?:[:/?#].*)?|127\\.0\\.0\\.1(?:[:/?#].*)?|10(?:\\.[0-9]+){3}(?:[:/?#].*)?|172\\.16(?:\\.[0-9]+){2}(?:[:/?#].*)?|192.168(?:\\.[0-9]+){2}(?:[:/?#].*)?|\\[?0:0:0:0:0:0:0:1\\]?(?:[:/?#].*)?|\\[?::1\\]?(?:[:/?#].*)?")
}
}

View File

@@ -0,0 +1,23 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Failing to ensure the utilization of SSL in an LDAP connection can cause the entire communication
to be sent in cleartext making it easier for an attacker to intercept it.</p>
</overview>
<recommendation>
<p>Always set <code>use_SSL</code> to <code>True</code>, call <code>start_tls_s()</code> or set a proper option flag (<code>ldap.OPT_X_TLS_XXXXXX</code>).</p>
</recommendation>
<example>
<p>This example shows both good and bad ways to deal with this issue under Python 3.</p>
<p>The first one sets <code>use_SSL</code> to true as a keyword argument whereas the second one fails to provide a value for it, so
the default one is used (<code>False</code>).</p>
<sample src="examples/LDAPInsecureAuth.py" />
</example>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Python Insecure LDAP Authentication
* @description Python LDAP Insecure LDAP Authentication
* @kind path-problem
* @problem.severity error
* @id py/insecure-ldap-auth
* @tags experimental
* security
* external/cwe/cwe-522
* external/cwe/cwe-523
*/
// determine precision above
import python
import DataFlow::PathGraph
import experimental.semmle.python.security.LDAPInsecureAuth
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
"This LDAP host"

View File

@@ -0,0 +1,20 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
app = Flask(__name__)
@app.route("/good")
def good():
srv = Server(host, port, use_ssl=True)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response
@app.route("/bad")
def bad():
srv = Server(host, port)
conn = Connection(srv, dn, password)
conn.search(dn, search_filter)
return conn.response

View File

@@ -0,0 +1,33 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
import XpathInjection::XpathInjection
import DataFlow::PathGraph
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
}
from XpathInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `XpathInjection::Configuration` is needed, otherwise
* `XpathInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
module XpathInjection {
import XpathInjectionCustomizations::XpathInjection
/**
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xpath Injection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,105 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/**
* A data flow source for "XPath injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "XPath injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "XPath injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "XPath injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** Returns an API node referring to `lxml.etree` */
API::Node etree() { result = API::moduleImport("lxml").getMember("etree") }
/** Returns an API node referring to `lxml.etree` */
API::Node etreeFromString() { result = etree().getMember("fromstring") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node etreeParse() { result = etree().getMember("parse") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node libxml2parseFile() { result = API::moduleImport("libxml2").getMember("parseFile") }
/**
* A Sink representing an argument to `etree.XPath` or `etree.ETXPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
* find_text = etree.ETXPath("`sink`")
*/
private class EtreeXpathArgument extends Sink {
EtreeXpathArgument() { this = etree().getMember(["XPath", "ETXPath"]).getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
* find_text = root.xpath("`sink`")
*/
private class EtreeFromstringXpathArgument extends Sink {
EtreeFromstringXpathArgument() {
this = etreeFromString().getReturn().getMember("xpath").getACall().getArg(0)
}
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends Sink {
ParseXpathArgument() { this = etreeParse().getReturn().getMember("xpath").getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends Sink {
ParseFileXpathEvalArgument() {
this = libxml2parseFile().getReturn().getMember("xpathEval").getACall().getArg(0)
}
}
}

View File

@@ -1,36 +0,0 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.Paths
import semmle.python.security.strings.Untrusted
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.Xpath
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XpathInjection::XpathInjectionSink
}
}
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -14,71 +14,34 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling Regular Expression-related APIs. */
module RegexExecution {
/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data-flow node that executes a regular expression.
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexExecution` instead.
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the executed expression.
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getRegexNode();
/**
* Gets the library used to execute the regular expression.
*/
abstract string getRegexModule();
abstract DataFlow::Node getAnInput();
}
}
/**
* A data-flow node that executes a regular expression.
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
* extend `LogOutput::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
class LogOutput extends DataFlow::Node {
LogOutput::Range range;
RegexExecution() { this = range }
LogOutput() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
string getRegexModule() { result = range.getRegexModule() }
}
/** Provides classes for modeling Regular Expression escape-related APIs. */
module RegexEscape {
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexEscape` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the escaped expression.
*/
abstract DataFlow::Node getRegexNode();
}
}
/**
* A data-flow node that escapes a regular expression.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexEscape::Range` instead.
*/
class RegexEscape extends DataFlow::Node {
RegexEscape::Range range;
RegexEscape() { this = range }
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
@@ -156,10 +119,20 @@ module LDAPBind {
* extend `LDAPBind` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the binding host.
*/
abstract DataFlow::Node getHost();
/**
* Gets the argument containing the binding expression.
*/
abstract DataFlow::Node getPassword();
/**
* Holds if the binding process use SSL.
*/
abstract predicate useSSL();
}
}
@@ -174,7 +147,20 @@ class LDAPBind extends DataFlow::Node {
LDAPBind() { this = range }
/**
* Gets the argument containing the binding host.
*/
DataFlow::Node getHost() { result = range.getHost() }
/**
* Gets the argument containing the binding expression.
*/
DataFlow::Node getPassword() { result = range.getPassword() }
/**
* Holds if the binding process use SSL.
*/
predicate useSSL() { range.useSSL() }
}
/** Provides classes for modeling SQL sanitization libraries. */
@@ -267,3 +253,46 @@ class NoSQLSanitizer extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be sanitized. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling HTTP Header APIs. */
module HeaderDeclaration {
/**
* A data-flow node that collects functions setting HTTP Headers.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HeaderDeclaration` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the header name.
*/
abstract DataFlow::Node getNameArg();
/**
* Gets the argument containing the header value.
*/
abstract DataFlow::Node getValueArg();
}
}
/**
* A data-flow node that collects functions setting HTTP Headers.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HeaderDeclaration::Range` instead.
*/
class HeaderDeclaration extends DataFlow::Node {
HeaderDeclaration::Range range;
HeaderDeclaration() { this = range }
/**
* Gets the argument containing the header name.
*/
DataFlow::Node getNameArg() { result = range.getNameArg() }
/**
* Gets the argument containing the header value.
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
}

View File

@@ -3,5 +3,9 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log

View File

@@ -0,0 +1,93 @@
/**
* Provides classes modeling security-relevant aspects of the `django` PyPI package.
* See https://www.djangoproject.com/.
*/
private import python
private import semmle.python.frameworks.Django
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
import semmle.python.dataflow.new.RemoteFlowSources
private module PrivateDjango {
private module django {
API::Node http() { result = API::moduleImport("django").getMember("http") }
module http {
API::Node response() { result = http().getMember("response") }
API::Node request() { result = http().getMember("request") }
module request {
module HttpRequest {
class DjangoGETParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGETParameter() { this = request().getMember("GET").getMember("get").getACall() }
override string getSourceType() { result = "django.http.request.GET.get" }
}
}
}
module response {
module HttpResponse {
API::Node baseClassRef() {
result = response().getMember("HttpResponse").getReturn()
or
// Handle `django.http.HttpResponse` alias
result = http().getMember("HttpResponse").getReturn()
}
/** Gets a reference to a header instance. */
private DataFlow::LocalSourceNode headerInstance(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript |
subscript.getObject() = baseClassRef().getAUse().asCfgNode() and
result.asCfgNode() = subscript
)
or
result.(DataFlow::AttrRead).getObject() = baseClassRef().getAUse()
)
or
exists(DataFlow::TypeTracker t2 | result = headerInstance(t2).track(t2, t))
}
/** Gets a reference to a header instance use. */
private DataFlow::Node headerInstance() {
headerInstance(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to a header instance call with `__setitem__`. */
private DataFlow::Node headerSetItemCall() {
result = headerInstance() and
result.(DataFlow::AttrRead).getAttributeName() = "__setitem__"
}
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
DjangoResponseSetItemCall() { this.getFunction() = headerSetItemCall() }
override DataFlow::Node getNameArg() { result = this.getArg(0) }
override DataFlow::Node getValueArg() { result = this.getArg(1) }
}
class DjangoResponseDefinition extends DataFlow::Node, HeaderDeclaration::Range {
DataFlow::Node headerInput;
DjangoResponseDefinition() {
this.asCfgNode().(DefinitionNode) = headerInstance().asCfgNode() and
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
}
override DataFlow::Node getNameArg() {
result.asExpr() = this.asExpr().(Subscript).getIndex()
}
override DataFlow::Node getValueArg() { result = headerInput }
}
}
}
}
}
}

View File

@@ -0,0 +1,84 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private import python
private import semmle.python.frameworks.Flask
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
module ExperimentalFlask {
/**
* A reference to either `flask.make_response` function, or the `make_response` method on
* an instance of `flask.Flask`. This creates an instance of the `flask_response`
* class (class-attribute on a flask application), which by default is
* `flask.Response`.
*
* See
* - https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.make_response
* - https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response
*/
private API::Node flaskMakeResponse() {
result =
[API::moduleImport("flask"), Flask::FlaskApp::instance()]
.getMember(["make_response", "jsonify", "make_default_options_response"])
}
/** Gets a reference to a header instance. */
private DataFlow::LocalSourceNode headerInstance(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::AttrRead).getObject().getALocalSource() =
[Flask::Response::classRef(), flaskMakeResponse()].getReturn().getAUse()
or
exists(DataFlow::TypeTracker t2 | result = headerInstance(t2).track(t2, t))
}
/** Gets a reference to a header instance use. */
private DataFlow::Node headerInstance() {
headerInstance(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to a header instance call/subscript */
private DataFlow::Node headerInstanceCall() {
headerInstance() in [result.(DataFlow::AttrRead), result.(DataFlow::AttrRead).getObject()] or
headerInstance().asExpr() = result.asExpr().(Subscript).getObject()
}
class FlaskHeaderDefinition extends DataFlow::Node, HeaderDeclaration::Range {
DataFlow::Node headerInput;
FlaskHeaderDefinition() {
this.asCfgNode().(DefinitionNode) = headerInstanceCall().asCfgNode() and
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
}
override DataFlow::Node getNameArg() { result.asExpr() = this.asExpr().(Subscript).getIndex() }
override DataFlow::Node getValueArg() { result = headerInput }
}
private class FlaskMakeResponseExtend extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
KeyValuePair item;
FlaskMakeResponseExtend() {
this.getFunction() = headerInstanceCall() and
item = this.getArg(_).asExpr().(Dict).getAnItem()
}
override DataFlow::Node getNameArg() { result.asExpr() = item.getKey() }
override DataFlow::Node getValueArg() { result.asExpr() = item.getValue() }
}
private class FlaskResponse extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
KeyValuePair item;
FlaskResponse() { this = Flask::Response::classRef().getACall() }
override DataFlow::Node getNameArg() { result.asExpr() = item.getKey() }
override DataFlow::Node getValueArg() { result.asExpr() = item.getValue() }
}
}

View File

@@ -88,6 +88,11 @@ private module LDAP {
result.(DataFlow::AttrRead).getAttributeName() instanceof LDAP2BindMethods
}
/**List of SSL-demanding options */
private class LDAPSSLOptions extends DataFlow::Node {
LDAPSSLOptions() { this = ldap().getMember("OPT_X_TLS_" + ["DEMAND", "HARD"]).getAUse() }
}
/**
* A class to find `ldap` methods binding a connection.
*
@@ -99,6 +104,44 @@ private module LDAP {
override DataFlow::Node getPassword() {
result in [this.getArg(1), this.getArgByName("cred")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode initialize |
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
result = initialize.getArg(0)
)
}
override predicate useSSL() {
// use initialize to correlate `this` and so avoid FP in several instances
exists(DataFlow::CallCfgNode initialize |
// ldap.set_option(ldap.OPT_X_TLS_%s)
ldap().getMember("set_option").getACall().getArg(_) instanceof LDAPSSLOptions
or
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = initialize and
initialize = ldapInitialize().getACall() and
(
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getObject().getALocalSource() = initialize and
startTLS.getMethodName() = "start_tls_s"
)
or
// ldap_connection.set_option(ldap.OPT_X_TLS_%s, True)
exists(DataFlow::CallCfgNode setOption |
setOption.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
initialize and
setOption.getFunction().(DataFlow::AttrRead).getAttributeName() = "set_option" and
setOption.getArg(0) instanceof LDAPSSLOptions and
not DataFlow::exprNode(any(False falseExpr))
.(DataFlow::LocalSourceNode)
.flowsTo(setOption.getArg(1))
)
)
)
}
}
/**
@@ -166,6 +209,31 @@ private module LDAP {
override DataFlow::Node getPassword() {
result in [this.getArg(2), this.getArgByName("password")]
}
override DataFlow::Node getHost() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
result = serverCall.getArg(0)
)
}
override predicate useSSL() {
exists(DataFlow::CallCfgNode serverCall |
serverCall = ldap3Server().getACall() and
this.getArg(0).getALocalSource() = serverCall and
DataFlow::exprNode(any(True trueExpr))
.(DataFlow::LocalSourceNode)
.flowsTo([serverCall.getArg(2), serverCall.getArgByName("use_ssl")])
)
or
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getMethodName() = "start_tls_s" and
startTLS.getObject().getALocalSource() = this
)
}
}
/**

View File

@@ -0,0 +1,118 @@
/**
* Provides classes modeling security-relevant aspects of the log libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.frameworks.Flask
private import semmle.python.ApiGraphs
/**
* Provides models for Python's log-related libraries.
*/
private module log {
/**
* Log output method list.
*
* See https://docs.python.org/3/library/logging.html#logger-objects
*/
private class LogOutputMethods extends string {
LogOutputMethods() {
this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"]
}
}
/**
* The class used to find the log output method of the `logging` module.
*
* See `LogOutputMethods`
*/
private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggingCall() {
this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find log output methods related to the `logging.getLogger` instance.
*
* See `LogOutputMethods`
*/
private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggerCall() {
this =
API::moduleImport("logging")
.getMember("getLogger")
.getReturn()
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application).
*
* See `LogOutputMethods`
*/
private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
FlaskLoggingCall() {
this =
Flask::FlaskApp::instance()
.getMember("logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application).
*
* See `LogOutputMethods`
*/
private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
DjangoLoggingCall() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
}

View File

@@ -9,91 +9,3 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for Python's `re` library.
*
* See https://docs.python.org/3/library/re.html
*/
private module Re {
/**
* List of `re` methods immediately executing an expression.
*
* See https://docs.python.org/3/library/re.html#module-contents
*/
private class RegexExecutionMethods extends string {
RegexExecutionMethods() {
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
}
}
/**
* A class to find `re` methods immediately executing an expression.
*
* See `RegexExecutionMethods`
*/
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
DataFlow::Node regexNode;
DirectRegex() {
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
*
* Given the following example:
*
* ```py
* pattern = re.compile(input)
* pattern.match(s)
* ```
*
* This class will identify that `re.compile` compiles `input` and afterwards
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
*
*
* See `RegexExecutionMethods`
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::MethodCallNode patternCall |
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall.flowsTo(this.getObject()) and
this.getMethodName() instanceof RegexExecutionMethods and
regexNode = patternCall.getArg(0)
)
}
override DataFlow::Node getRegexNode() { result = regexNode }
override string getRegexModule() { result = "re" }
}
/**
* A class to find `re` methods escaping an expression.
*
* See https://docs.python.org/3/library/re.html#re.escape
*/
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
DataFlow::Node regexNode;
ReEscape() {
this = API::moduleImport("re").getMember("escape").getACall() and
regexNode = this.getArg(0)
}
override DataFlow::Node getRegexNode() { result = regexNode }
}
}

View File

@@ -0,0 +1,33 @@
/**
* Provides classes modeling security-relevant aspects of the `Werkzeug` PyPI package.
* See
* - https://pypi.org/project/Werkzeug/
* - https://werkzeug.palletsprojects.com/en/1.0.x/#werkzeug
*/
private import python
private import semmle.python.frameworks.Flask
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Werkzeug {
module datastructures {
module Headers {
class WerkzeugHeaderAddCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
WerkzeugHeaderAddCall() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
API::moduleImport("werkzeug")
.getMember("datastructures")
.getMember("Headers")
.getACall() and
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "add"
}
override DataFlow::Node getNameArg() { result = this.getArg(0) }
override DataFlow::Node getValueArg() { result = this.getArg(1) }
}
}
}
}

View File

@@ -0,0 +1,106 @@
/**
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import experimental.semmle.python.Concepts
string getFullHostRegex() { result = "(?i)ldap://.+" }
string getSchemaRegex() { result = "(?i)ldap(://)?" }
string getPrivateHostRegex() {
result =
"(?i)localhost(?:[:/?#].*)?|127\\.0\\.0\\.1(?:[:/?#].*)?|10(?:\\.[0-9]+){3}(?:[:/?#].*)?|172\\.16(?:\\.[0-9]+){2}(?:[:/?#].*)?|192.168(?:\\.[0-9]+){2}(?:[:/?#].*)?|\\[?0:0:0:0:0:0:0:1\\]?(?:[:/?#].*)?|\\[?::1\\]?(?:[:/?#].*)?"
}
// "ldap://somethingon.theinternet.com"
class LDAPFullHost extends StrConst {
LDAPFullHost() {
exists(string s |
s = this.getText() and
s.regexpMatch(getFullHostRegex()) and
// check what comes after the `ldap://` prefix
not s.substring(7, s.length()).regexpMatch(getPrivateHostRegex())
)
}
}
class LDAPSchema extends StrConst {
LDAPSchema() { this.getText().regexpMatch(getSchemaRegex()) }
}
class LDAPPrivateHost extends StrConst {
LDAPPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
}
predicate concatAndCompareAgainstFullHostRegex(LDAPSchema schema, StrConst host) {
not host instanceof LDAPPrivateHost and
(schema.getText() + host.getText()).regexpMatch(getFullHostRegex())
}
// "ldap://" + "somethingon.theinternet.com"
class LDAPBothStrings extends BinaryExpr {
LDAPBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
}
// schema + host
class LDAPBothVar extends BinaryExpr {
LDAPBothVar() {
exists(SsaVariable schemaVar, SsaVariable hostVar |
this.getLeft() = schemaVar.getVariable().getALoad() and // getAUse is incompatible with Expr
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
.getDefinition()
.getImmediateDominator()
.getNode(), hostVar.getDefinition().getImmediateDominator().getNode())
)
}
}
// schema + "somethingon.theinternet.com"
class LDAPVarString extends BinaryExpr {
LDAPVarString() {
exists(SsaVariable schemaVar |
this.getLeft() = schemaVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
.getDefinition()
.getImmediateDominator()
.getNode(), this.getRight())
)
}
}
// "ldap://" + host
class LDAPStringVar extends BinaryExpr {
LDAPStringVar() {
exists(SsaVariable hostVar |
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(this.getLeft(),
hostVar.getDefinition().getImmediateDominator().getNode())
)
}
}
/**
* A taint-tracking configuration for detecting LDAP insecure authentications.
*/
class LDAPInsecureAuthConfig extends TaintTracking::Configuration {
LDAPInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSource or
source.asExpr() instanceof LDAPFullHost or
source.asExpr() instanceof LDAPBothStrings or
source.asExpr() instanceof LDAPBothVar or
source.asExpr() instanceof LDAPVarString or
source.asExpr() instanceof LDAPStringVar
}
override predicate isSink(DataFlow::Node sink) {
exists(LDAPBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
}
}

View File

@@ -0,0 +1,20 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for detecting HTTP Header injections.
*/
class HeaderInjectionFlowConfig extends TaintTracking::Configuration {
HeaderInjectionFlowConfig() { this = "HeaderInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(HeaderDeclaration headerDeclaration |
sink in [headerDeclaration.getNameArg(), headerDeclaration.getValueArg()]
)
}
}

View File

@@ -0,0 +1,24 @@
import python
import semmle.python.Concepts
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class LogInjectionFlowConfig extends TaintTracking::Configuration {
LogInjectionFlowConfig() { this = "LogInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(LogOutput logoutput).getAnInput() }
override predicate isSanitizer(DataFlow::Node node) {
exists(CallNode call |
node.asCfgNode() = call.getFunction().(AttrNode).getObject("replace") and
call.getArg(0).getNode().(StrConst).getText() in ["\r\n", "\n"]
)
}
}

View File

@@ -1,53 +0,0 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A class to find methods executing regular expressions.
*
* See `RegexExecution`
*/
class RegexInjectionSink extends DataFlow::Node {
string regexModule;
Attribute regexMethod;
RegexInjectionSink() {
exists(RegexExecution reExec |
this = reExec.getRegexNode() and
regexModule = reExec.getRegexModule() and
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
)
}
/**
* Gets the argument containing the executed expression.
*/
string getRegexModule() { result = regexModule }
/**
* Gets the method used to execute the regular expression.
*/
Attribute getRegexMethod() { result = regexMethod }
}
/**
* A taint-tracking configuration for detecting regular expression injections.
*/
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(RegexEscape reEscape).getRegexNode()
}
}

View File

@@ -1,115 +0,0 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query
* to extend `TaintKind` and `TaintSink`.
*/
import python
import semmle.python.dataflow.TaintTracking
import semmle.python.web.HttpRequest
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }
/** Returns a class value which refers to `lxml.etree` */
Value libxml2parseFile() { result = Value::named("libxml2.parseFile") }
/** A generic taint sink that is vulnerable to Xpath injection. */
abstract class XpathInjectionSink extends TaintSink { }
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
*/
private class EtreeXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.XPath" }
EtreeXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `etree.EtXpath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.EtXPath("`sink`")
*/
private class EtreeETXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.ETXpath" }
EtreeETXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("ETXPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.parse.xpath" }
ParseXpathArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpath") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends XpathInjectionSink {
override string toString() { result = "libxml2.parseFile.xpathEval" }
ParseFileXpathEvalArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).pointsTo(libxml2parseFile()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpathEval") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
}

View File

@@ -8,7 +8,7 @@ import semmle.python.Files
* column `startcol` of line `startline` to column `endcol` of line `endline`
* in file `filepath`.
*
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* For more information, see [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
external predicate defectResults(
int id, string queryPath, string filepath, int startline, int startcol, int endline, int endcol,
@@ -54,7 +54,7 @@ class DefectResult extends int {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn

View File

@@ -38,7 +38,7 @@ class ThriftElement extends ExternalData {
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn