Merge pull request #7735 from yoff/python/promote-log-injection

Python: promote log injection
This commit is contained in:
Rasmus Wriedt Larsen
2022-02-23 16:21:12 +01:00
committed by GitHub
19 changed files with 208 additions and 205 deletions

View File

@@ -2295,4 +2295,22 @@ module PrivateDjango {
override string getMimetypeDefault() { none() }
}
// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A standard Python logger instance from Django.
* see https://github.com/django/django/blob/stable/4.0.x/django/utils/log.py#L11
*/
private class DjangoLogger extends Stdlib::Logger::InstanceSource {
DjangoLogger() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getAnImmediateUse()
}
}
}

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.frameworks.Stdlib
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
@@ -569,4 +570,18 @@ module Flask {
result in [this.getArg(0), this.getArgByName("filename_or_fp")]
}
}
// ---------------------------------------------------------------------------
// Logging
// ---------------------------------------------------------------------------
/**
* A Flask application provides a standard Python logger via the `logger` attribute.
*
* See
* - https://flask.palletsprojects.com/en/2.0.x/api/#flask.Flask.logger
* - https://flask.palletsprojects.com/en/2.0.x/logging/
*/
private class FlaskLogger extends Stdlib::Logger::InstanceSource {
FlaskLogger() { this = FlaskApp::instance().getMember("logger").getAnImmediateUse() }
}
}

View File

@@ -237,6 +237,54 @@ module Stdlib {
}
}
}
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
private API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}
/**
* A source of instances of `logging.Logger`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Logger::instance()` to get references to instances of `logging.Logger`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `logging.Logger`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
ClassInstantiation() {
this = subclassRef().getACall()
or
this = API::moduleImport("logging").getMember("root").getAnImmediateUse()
or
this = API::moduleImport("logging").getMember("getLogger").getACall()
}
}
/** Gets a reference to an instance of `logging.Logger`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `logging.Logger`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
}
/**
@@ -2642,27 +2690,6 @@ private module StdlibPrivate {
// ---------------------------------------------------------------------------
// logging
// ---------------------------------------------------------------------------
/**
* Provides models for the `logging.Logger` class and subclasses.
*
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
/** Gets a reference to the `logging.Logger` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
}
/** Gets a reference to an instance of `logging.Logger` or any subclass. */
API::Node instance() {
result = subclassRef().getReturn()
or
result = API::moduleImport("logging").getMember("root")
or
result = API::moduleImport("logging").getMember("getLogger").getReturn()
}
}
/**
* A call to one of the logging methods from `logging` or on a `logging.Logger`
* subclass.
@@ -2683,14 +2710,14 @@ private module StdlibPrivate {
method = "log" and
msgIndex = 1
|
this = Logger::instance().getMember(method).getACall()
this.(DataFlow::MethodCallNode).calls(Stdlib::Logger::instance(), method)
or
this = API::moduleImport("logging").getMember(method).getACall()
)
}
override DataFlow::Node getAnInput() {
result = this.getArgByName("msg")
result = this.getArgByName(["msg", "extra"])
or
result = this.getArg(any(int i | i >= msgIndex))
}

View File

@@ -0,0 +1,35 @@
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*
* Note, for performance reasons: only import this file if
* `LogInjection::Configuration` is needed, otherwise
* `LogInjectionCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for tracking untrusted user input used in log entries.
*/
module LogInjection {
import LogInjectionCustomizations::LogInjection
/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "LogInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,73 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "log injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module LogInjection {
/**
* A data flow source for "log injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "log injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "log injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "log injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A logging operation, considered as a flow sink.
*/
class LoggingAsSink extends Sink {
LoggingAsSink() { this = any(Logging write).getAnInput() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A call to replace line breaks, considered as a sanitizer.
*/
class ReplaceLineBreaksSanitizer extends Sanitizer, DataFlow::CallCfgNode {
// Note: This sanitizer is not 100% accurate, since:
// - we do not check that all kinds of line breaks are replaced
// - we do not check that one kind of line breaks is not replaced by another
//
// However, we lack a simple way to do better, and the query would likely
// be too noisy without this.
//
// TODO: Consider rewriting using flow states.
ReplaceLineBreaksSanitizer() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and
this.getArg(0).asExpr().(StrConst).getText() in ["\r\n", "\n"]
}
}
}

View File

@@ -8,8 +8,11 @@
<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>
<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
<p>Forgery can occur if a user provides some input with characters that are interpreted
when the log output is displayed. If the log is displayed as a plain text file, then new
line characters can be used by a malicious user to create the appearance of multiple log
entries. If the log is displayed as HTML, then arbitrary HTML may be included to spoof
log entries.</p>
</overview>
<recommendation>
@@ -29,14 +32,14 @@ other forms of HTML injection.
<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />
<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>

View File

@@ -4,17 +4,18 @@
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @security-severity 7.8
* @precision medium
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/
import python
import experimental.semmle.python.security.injection.LogInjection
import semmle.python.security.dataflow.LogInjection
import DataFlow::PathGraph
from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).

View File

@@ -14,36 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LogOutput::Range` instead.
*/
class LogOutput extends DataFlow::Node {
LogOutput::Range range;
LogOutput() { this = range }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**

View File

@@ -8,7 +8,6 @@ private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Python_JWT

View File

@@ -1,118 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of the log libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.frameworks.Flask
private import semmle.python.ApiGraphs
/**
* Provides models for Python's log-related libraries.
*/
private module log {
/**
* Log output method list.
*
* See https://docs.python.org/3/library/logging.html#logger-objects
*/
private class LogOutputMethods extends string {
LogOutputMethods() {
this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"]
}
}
/**
* The class used to find the log output method of the `logging` module.
*
* See `LogOutputMethods`
*/
private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggingCall() {
this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find log output methods related to the `logging.getLogger` instance.
*
* See `LogOutputMethods`
*/
private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggerCall() {
this =
API::moduleImport("logging")
.getMember("getLogger")
.getReturn()
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application).
*
* See `LogOutputMethods`
*/
private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
FlaskLoggingCall() {
this =
Flask::FlaskApp::instance()
.getMember("logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application).
*
* See `LogOutputMethods`
*/
private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
DjangoLoggingCall() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
}

View File

@@ -1,24 +0,0 @@
import python
import semmle.python.Concepts
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class LogInjectionFlowConfig extends TaintTracking::Configuration {
LogInjectionFlowConfig() { this = "LogInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(LogOutput logoutput).getAnInput() }
override predicate isSanitizer(DataFlow::Node node) {
exists(CallNode call |
node.asCfgNode() = call.getFunction().(AttrNode).getObject("replace") and
call.getArg(0).getNode().(StrConst).getText() in ["\r\n", "\n"]
)
}
}

View File

@@ -1 +0,0 @@
experimental/Security/CWE-117/LogInjection.ql

View File

@@ -0,0 +1 @@
Security/CWE-117/LogInjection.ql