Merge branch 'main' into rc/3.7

This commit is contained in:
Andrew Eisenberg
2022-09-20 08:33:58 -07:00
2309 changed files with 133758 additions and 43219 deletions

View File

@@ -13,11 +13,12 @@
*/
import python
import semmle.python.ApiGraphs
predicate doesnt_reraise(ExceptStmt ex) { ex.getAFlowNode().getBasicBlock().reachesExit() }
predicate catches_base_exception(ExceptStmt ex) {
ex.getType().pointsTo(ClassValue::baseException())
ex.getType() = API::builtin("BaseException").getAValueReachableFromSource().asExpr()
or
not exists(ex.getType())
}

View File

@@ -12,6 +12,7 @@
*/
import python
import semmle.python.ApiGraphs
predicate empty_except(ExceptStmt ex) {
not exists(Stmt s | s = ex.getAStmt() and not s instanceof Pass)
@@ -28,7 +29,7 @@ predicate no_comment(ExceptStmt ex) {
}
predicate non_local_control_flow(ExceptStmt ex) {
ex.getType().pointsTo(ClassValue::stopIteration())
ex.getType() = API::builtin("StopIteration").getAValueReachableFromSource().asExpr()
}
predicate try_has_normal_exit(Try try) {
@@ -61,27 +62,32 @@ predicate subscript(Stmt s) {
s.(Delete).getATarget() instanceof Subscript
}
predicate encode_decode(Call ex, ClassValue type) {
predicate encode_decode(Call ex, Expr type) {
exists(string name | ex.getFunc().(Attribute).getName() = name |
name = "encode" and type = ClassValue::unicodeEncodeError()
name = "encode" and
type = API::builtin("UnicodeEncodeError").getAValueReachableFromSource().asExpr()
or
name = "decode" and type = ClassValue::unicodeDecodeError()
name = "decode" and
type = API::builtin("UnicodeDecodeError").getAValueReachableFromSource().asExpr()
)
}
predicate small_handler(ExceptStmt ex, Stmt s, ClassValue type) {
predicate small_handler(ExceptStmt ex, Stmt s, Expr type) {
not exists(ex.getTry().getStmt(1)) and
s = ex.getTry().getStmt(0) and
ex.getType().pointsTo(type)
ex.getType() = type
}
predicate focussed_handler(ExceptStmt ex) {
exists(Stmt s, ClassValue type | small_handler(ex, s, type) |
subscript(s) and type.getASuperType() = ClassValue::lookupError()
exists(Stmt s, Expr type | small_handler(ex, s, type) |
subscript(s) and
type = API::builtin("IndexError").getASubclass*().getAValueReachableFromSource().asExpr()
or
attribute_access(s) and type = ClassValue::attributeError()
attribute_access(s) and
type = API::builtin("AttributeError").getAValueReachableFromSource().asExpr()
or
s.(ExprStmt).getValue() instanceof Name and type = ClassValue::nameError()
s.(ExprStmt).getValue() instanceof Name and
type = API::builtin("NameError").getAValueReachableFromSource().asExpr()
or
encode_decode(s.(ExprStmt).getValue(), type)
)

View File

@@ -10,11 +10,14 @@
*/
import python
import semmle.python.dataflow.new.DataFlow
from Raise r, Value v, AstNode origin
from Raise r, DataFlow::LocalSourceNode origin
where
r.getException().pointsTo(v, origin) and
v.getClass() = ClassValue::tuple() and
exists(DataFlow::Node exception | exception.asExpr() = r.getException() |
origin.flowsTo(exception)
) and
origin.asExpr() instanceof Tuple and
major_version() = 2
/* Raising a tuple is a type error in Python 3, so is handled by the IllegalRaise query. */
select r,

View File

@@ -11,17 +11,22 @@
*/
import python
private import semmle.python.ApiGraphs
FunctionValue iter() { result = Value::named("iter") }
API::Node iter() { result = API::builtin("iter") }
BuiltinFunctionValue next() { result = Value::named("next") }
API::Node next() { result = API::builtin("next") }
API::Node stopIteration() { result = API::builtin("StopIteration") }
predicate call_to_iter(CallNode call, EssaVariable sequence) {
sequence.getAUse() = iter().getArgumentForCall(call, 0)
call = iter().getACall().asCfgNode() and
call.getArg(0) = sequence.getAUse()
}
predicate call_to_next(CallNode call, ControlFlowNode iter) {
iter = next().getArgumentForCall(call, 0)
call = next().getACall().asCfgNode() and
call.getArg(0) = iter
}
predicate call_to_next_has_default(CallNode call) {
@@ -47,7 +52,7 @@ predicate iter_not_exhausted(EssaVariable iterator) {
predicate stop_iteration_handled(CallNode call) {
exists(Try t |
t.containsInScope(call.getNode()) and
t.getAHandler().getType().pointsTo(ClassValue::stopIteration())
t.getAHandler().getType() = stopIteration().getAValueReachableFromSource().asExpr()
)
}
@@ -61,5 +66,11 @@ where
) and
call.getNode().getScope().(Function).isGenerator() and
not exists(Comp comp | comp.contains(call.getNode())) and
not stop_iteration_handled(call)
not stop_iteration_handled(call) and
// PEP 479 removes this concern from 3.7 onwards
// see: https://peps.python.org/pep-0479/
//
// However, we do not know the minor version of the analyzed code (only of the extractor),
// so we only alert on Python 2.
major_version() = 2
select call, "Call to next() in a generator"

View File

@@ -19,5 +19,5 @@ from
ModificationOfParameterWithDefault::Configuration config, DataFlow::PathNode source,
DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is mutated.", source.getNode(),
"Default value"
select sink.getNode(), source, sink, "This expression mutates $@.", source.getNode(),
"a default value"

View File

@@ -16,4 +16,4 @@ import Lexical.CommentedOutCode
from CommentedOutCodeBlock c
where not c.maybeExampleCode()
select c, "These comments appear to contain commented-out code."
select c, "This comment appears to contain commented-out code."

View File

@@ -65,13 +65,17 @@ private class DefaultSafeExternalApi extends SafeExternalApi {
/** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCall call;
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalApiDataNode() {
exists(call.getLocation().getFile().getRelativePath()) and
callable = call.getCallable() and
exists(DataFlowPrivate::DataFlowCall call |
exists(call.getLocation().getFile().getRelativePath())
|
callable = call.getCallable() and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i)
) and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
@@ -82,8 +86,6 @@ class ExternalApiDataNode extends DataFlow::Node {
or
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
) and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i) and
// Not already modeled as a taint step
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.

View File

@@ -18,5 +18,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Extraction of tarfile from $@", source.getNode(),
select sink.getNode(), source, sink, "This file extraction depends on $@", source.getNode(),
"a potentially untrusted source"

View File

@@ -20,5 +20,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
select sink.getNode(), source, sink, "This command line depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -23,6 +23,5 @@ where
or
any(FilterConfiguration filterConfig).hasFlowPath(source, sink) and
parameterName = "filter"
select sink.getNode(), source, sink,
"$@ LDAP query parameter (" + parameterName + ") comes from $@.", sink.getNode(), "This",
source.getNode(), "a user-provided value"
select sink.getNode(), source, sink, "$@ depends on $@.", sink.getNode(),
"LDAP query parameter (" + parameterName + ")", source.getNode(), "a user-provided value"

View File

@@ -20,5 +20,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"
select sink.getNode(), source, sink, "This code execution depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -17,5 +17,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"
select sink.getNode(), source, sink, "This log entry depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -19,5 +19,6 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
"Error information"
select sink.getNode(), source, sink,
"$@ flows to this location and may be exposed to an external user.", source.getNode(),
"Stack trace information"

View File

@@ -13,10 +13,9 @@
import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
from
HTTP::Client::Request request, DataFlow::Node disablingNode, DataFlow::Node origin, string ending
Http::Client::Request request, DataFlow::Node disablingNode, DataFlow::Node origin, string ending
where
request.disablesCertificateValidation(disablingNode, origin) and
// Showing the origin is only useful when it's a different node than the one disabling

View File

@@ -14,24 +14,24 @@
import python
import semmle.python.Concepts
predicate relevantSetting(HTTP::Server::CsrfProtectionSetting s) {
predicate relevantSetting(Http::Server::CsrfProtectionSetting s) {
// rule out test code as this is a common place to turn off CSRF protection.
// We don't use normal `TestScope` to find test files, since we also want to match
// a settings file such as `.../integration-tests/settings.py`
not s.getLocation().getFile().getAbsolutePath().matches("%test%")
}
predicate vulnerableSetting(HTTP::Server::CsrfProtectionSetting s) {
predicate vulnerableSetting(Http::Server::CsrfProtectionSetting s) {
s.getVerificationSetting() = false and
not exists(HTTP::Server::CsrfLocalProtectionSetting p | p.csrfEnabled()) and
not exists(Http::Server::CsrfLocalProtectionSetting p | p.csrfEnabled()) and
relevantSetting(s)
}
from HTTP::Server::CsrfProtectionSetting setting
from Http::Server::CsrfProtectionSetting setting
where
vulnerableSetting(setting) and
// We have seen examples of dummy projects with vulnerable settings alongside a main
// project with a protecting settings file. We want to rule out this scenario, so we
// require all non-test settings to be vulnerable.
forall(HTTP::Server::CsrfProtectionSetting s | relevantSetting(s) | vulnerableSetting(s))
forall(Http::Server::CsrfProtectionSetting s | relevantSetting(s) | vulnerableSetting(s))
select setting, "Potential CSRF vulnerability due to forgery protection being disabled or weakened."

View File

@@ -18,4 +18,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
select sink.getNode(), source, sink, "Unsafe deserialization depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -18,5 +18,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
"A user-provided value"
select sink.getNode(), source, sink, "Untrusted URL redirection depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -19,5 +19,5 @@ import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"A $@ is parsed as XML without guarding against external entity expansion.", source.getNode(),
"user-provided value"
"XML parsing depends on $@ without guarding against external entity expansion.", source.getNode(),
"a user-provided value"

View File

@@ -17,4 +17,5 @@ import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"
select sink.getNode(), source, sink, "XPath expression depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -4,6 +4,7 @@
* to match may be vulnerable to denial-of-service attacks.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id py/polynomial-redos
* @tags security

View File

@@ -5,6 +5,7 @@
* attacks.
* @kind problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/redos
* @tags security

View File

@@ -5,6 +5,7 @@
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id py/regex-injection
* @tags security
@@ -23,6 +24,6 @@ from
where
config.hasFlowPath(source, sink) and
regexExecution = sink.getNode().(Sink).getRegexExecution()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", regexExecution, regexExecution.getName()
select sink.getNode(), source, sink, "$@ depends on $@ and executed by $@.", sink.getNode(),
"This regular expression", source.getNode(), "a user-provided value", regexExecution,
regexExecution.getName()

View File

@@ -19,5 +19,5 @@ import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"A $@ is parsed as XML without guarding against uncontrolled entity expansion.", source.getNode(),
"user-provided value"
"XML parsing depends on $@ without guarding against uncontrolled entity expansion.",
source.getNode(), "a user-provided value"

View File

@@ -16,7 +16,7 @@ import DataFlow::PathGraph
from
FullServerSideRequestForgeryConfiguration fullConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
DataFlow::PathNode sink, Http::Client::Request request
where
request = sink.getNode().(Sink).getRequest() and
fullConfig.hasFlowPath(source, sink) and

View File

@@ -16,7 +16,7 @@ import DataFlow::PathGraph
from
PartialServerSideRequestForgeryConfiguration partialConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
DataFlow::PathNode sink, Http::Client::Request request
where
request = sink.getNode().(Sink).getRequest() and
partialConfig.hasFlowPath(source, sink) and

View File

@@ -61,4 +61,4 @@ predicate reportable_unreachable(Stmt s) {
from Stmt s
where reportable_unreachable(s)
select s, "Unreachable statement."
select s, "This statement is unreachable."

View File

@@ -43,4 +43,4 @@ where
unused_local(unused, v) and
// If unused is part of a tuple, count it as unused if all elements of that tuple are unused.
forall(Name el | el = unused.getParentNode().(Tuple).getAnElt() | unused_local(el, _))
select unused, "The value assigned to local variable '" + v.getId() + "' is never used."
select unused, "Variable " + v.getId() + " is not used"

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The alert message of many queries have been changed to make the message consistent with other languages.

View File

@@ -0,0 +1,4 @@
---
category: queryMetadata
---
* Added the `security-severity` tag the `py/redos`, `py/polynomial-redos`, and `py/regex-injection` queries.

View File

@@ -416,7 +416,7 @@ class CsvWriter extends DataFlow::Node {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Cookie::Range` instead.
*/
class Cookie extends HTTP::Server::CookieWrite instanceof Cookie::Range {
class Cookie extends Http::Server::CookieWrite instanceof Cookie::Range {
/**
* Holds if this cookie is secure.
*/
@@ -441,7 +441,7 @@ module Cookie {
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Cookie` instead.
*/
abstract class Range extends HTTP::Server::CookieWrite::Range {
abstract class Range extends Http::Server::CookieWrite::Range {
/**
* Holds if this cookie is secure.
*/

View File

@@ -15,18 +15,21 @@ private module ExperimentalPrivateDjango {
private module DjangoMod {
API::Node http() { result = API::moduleImport("django").getMember("http") }
module Http {
module DjangoHttp {
API::Node response() { result = http().getMember("response") }
API::Node request() { result = http().getMember("request") }
module Request {
module HttpRequest {
class DjangoGETParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGETParameter() { this = request().getMember("GET").getMember("get").getACall() }
class DjangoGetParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGetParameter() { this = request().getMember("GET").getMember("get").getACall() }
override string getSourceType() { result = "django.http.request.GET.get" }
}
/** DEPRECATED: Alias for DjangoGetParameter */
deprecated class DjangoGETParameter = DjangoGetParameter;
}
}
@@ -51,7 +54,7 @@ private module ExperimentalPrivateDjango {
*
* Use the predicate `HttpResponse::instance()` to get references to instances of `django.http.response.HttpResponse`.
*/
abstract class InstanceSource extends HTTP::Server::HttpResponse::Range, DataFlow::Node {
abstract class InstanceSource extends Http::Server::HttpResponse::Range, DataFlow::Node {
}
/** A direct instantiation of `django.http.response.HttpResponse`. */
@@ -153,7 +156,7 @@ private module ExperimentalPrivateDjango {
*/
class DjangoResponseSetCookieCall extends DataFlow::MethodCallNode, Cookie::Range {
DjangoResponseSetCookieCall() {
this.calls(PrivateDjango::DjangoImpl::Http::Response::HttpResponse::instance(),
this.calls(PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponse::instance(),
"set_cookie")
}

View File

@@ -126,9 +126,9 @@ private module Ldap {
(
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getObject().getALocalSource() = initialize and
startTLS.getMethodName() = "start_tls_s"
exists(DataFlow::MethodCallNode startTls |
startTls.getObject().getALocalSource() = initialize and
startTls.getMethodName() = "start_tls_s"
)
or
// ldap_connection.set_option(ldap.OPT_X_TLS_%s, True)
@@ -234,9 +234,9 @@ private module Ldap {
or
// ldap_connection.start_tls_s()
// see https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#ldap.LDAPObject.start_tls_s
exists(DataFlow::MethodCallNode startTLS |
startTLS.getMethodName() = "start_tls_s" and
startTLS.getObject().getALocalSource() = this
exists(DataFlow::MethodCallNode startTls |
startTls.getMethodName() = "start_tls_s" and
startTls.getObject().getALocalSource() = this
)
}

View File

@@ -31,8 +31,8 @@ module SmtpLib {
* argument. Used because of the impossibility to get local source nodes from `_subparts`'
* `(List|Tuple)` elements.
*/
private class SMTPMessageConfig extends TaintTracking2::Configuration {
SMTPMessageConfig() { this = "SMTPMessageConfig" }
private class SmtpMessageConfig extends TaintTracking2::Configuration {
SmtpMessageConfig() { this = "SMTPMessageConfig" }
override predicate isSource(DataFlow::Node source) { source = mimeText(_) }
@@ -87,7 +87,7 @@ module SmtpLib {
sink =
[sendCall.getArg(2), sendCall.getArg(2).(DataFlow::MethodCallNode).getObject()]
.getALocalSource() and
any(SMTPMessageConfig a)
any(SmtpMessageConfig a)
.hasFlow(source, sink.(DataFlow::CallCfgNode).getArgByName("_subparts"))
or
// via .attach()
@@ -117,7 +117,7 @@ module SmtpLib {
* * `sub` would be `message["Subject"]` (`Subscript`)
* * `result` would be `"multipart test"`
*/
private DataFlow::Node getSMTPSubscriptByIndex(DataFlow::CallCfgNode sendCall, string index) {
private DataFlow::Node getSmtpSubscriptByIndex(DataFlow::CallCfgNode sendCall, string index) {
exists(DefinitionNode def, Subscript sub |
sub = def.getNode() and
DataFlow::exprNode(sub.getObject()).getALocalSource() =
@@ -163,15 +163,15 @@ module SmtpLib {
override DataFlow::Node getHtmlBody() { result = getSmtpMessage(this, "html") }
override DataFlow::Node getTo() {
result in [this.getArg(1), getSMTPSubscriptByIndex(this, "To")]
result in [this.getArg(1), getSmtpSubscriptByIndex(this, "To")]
}
override DataFlow::Node getFrom() {
result in [this.getArg(0), getSMTPSubscriptByIndex(this, "From")]
result in [this.getArg(0), getSmtpSubscriptByIndex(this, "From")]
}
override DataFlow::Node getSubject() {
result in [this.getArg(2), getSMTPSubscriptByIndex(this, "Subject")]
result in [this.getArg(2), getSmtpSubscriptByIndex(this, "Subject")]
}
}
}

View File

@@ -13,7 +13,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import meta.MetaMetrics
from HTTP::Server::RequestHandler requestHandler, string title
from Http::Server::RequestHandler requestHandler, string title
where
not requestHandler.getLocation().getFile() instanceof IgnoredFile and
if requestHandler.isMethod()