Merge branch 'main' into jty/python/emailInjection

This commit is contained in:
${sleep,7}
2022-04-20 09:50:08 -04:00
committed by GitHub
2207 changed files with 142269 additions and 105970 deletions

View File

@@ -1,3 +1,20 @@
## 0.0.13
## 0.0.12
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).
## 0.0.10
### New Queries
* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
## 0.0.9
### Bug Fixes

View File

@@ -1,7 +1,7 @@
import python
private import semmle.python.pointsto.PointsTo
/** Helper class for UndefinedClassAttribute.ql and MaybeUndefinedClassAttribute.ql */
/** A helper class for UndefinedClassAttribute.ql and MaybeUndefinedClassAttribute.ql */
class CheckClass extends ClassObject {
private predicate ofInterest() {
not this.unknowableAttributes() and

View File

@@ -32,7 +32,11 @@ predicate guarded_not_empty_sequence(EssaVariable sequence) {
sequence.getDefinition() instanceof EssaEdgeRefinement
}
/** The pattern `next(iter(x))` is often used where `x` is known not be empty. Check for that. */
/**
* Holds if `iterator` is not exhausted.
*
* The pattern `next(iter(x))` is often used where `x` is known not be empty. Check for that.
*/
predicate iter_not_exhausted(EssaVariable iterator) {
exists(EssaVariable sequence |
call_to_iter(iterator.getDefinition().(AssignmentDefinition).getValue(), sequence) and

View File

@@ -1,7 +1,7 @@
import python
/** A string constant that looks like it may be used in string formatting operations. */
library class PossibleAdvancedFormatString extends StrConst {
class PossibleAdvancedFormatString extends StrConst {
PossibleAdvancedFormatString() { this.getText().matches("%{%}%") }
private predicate field(int start, int end) {

View File

@@ -13,7 +13,7 @@
import python
/**
* The module `name` was deprecated in Python version `major`.`minor`,
* Holds if the module `name` was deprecated in Python version `major`.`minor`,
* and module `instead` should be used instead (or `instead = "no replacement"`)
*/
predicate deprecated_module(string name, string instead, int major, int minor) {

View File

@@ -40,16 +40,6 @@ private predicate class_statement(Comment c) {
private predicate triple_quote(Comment c) { c.getText().regexpMatch("#.*(\"\"\"|''').*") }
private predicate triple_quoted_string_part(Comment start, Comment end) {
triple_quote(start) and end = start
or
exists(Comment mid |
triple_quoted_string_part(start, mid) and
end = non_empty_following(mid) and
not triple_quote(end)
)
}
private predicate maybe_code(Comment c) {
not non_code(c) and not filler(c) and not endline_comment(c) and not file_or_url(c)
or
@@ -158,11 +148,11 @@ private predicate commented_out_code_block(Comment start, Comment end) {
not commented_out_code(non_empty_following(end))
}
/* A single line comment that appears to be commented out code */
/** A single line comment that appears to be commented out code */
class CommentedOutCodeLine extends Comment {
CommentedOutCodeLine() { exists(CommentedOutCodeBlock b | b.contains(this)) }
/* Whether this commented-out code line is likely to be example code embedded in a larger comment. */
/** Holds if this commented-out code line is likely to be example code embedded in a larger comment. */
predicate maybeExampleCode() {
exists(CommentedOutCodeBlock block |
block.contains(this) and
@@ -178,7 +168,7 @@ class CommentedOutCodeBlock extends @py_comment {
/** Gets a textual representation of this element. */
string toString() { result = "Commented out code" }
/** Whether this commented-out code block contains the comment c */
/** Holds if this commented-out code block contains the comment c */
predicate contains(Comment c) {
this = c
or
@@ -189,7 +179,7 @@ class CommentedOutCodeBlock extends @py_comment {
)
}
/** The length of this comment block (in comments) */
/** Gets the length of this comment block (in comments) */
int length() { result = count(Comment c | this.contains(c)) }
/**

View File

@@ -66,7 +66,7 @@ predicate passes_open_files(Variable v, ControlFlowNode test, boolean sense) {
)
}
/* Helper for `def_is_open` to give better join order */
// Helper for `def_is_open` to give better join order
private predicate passes_open_files(PyEdgeRefinement refinement) {
passes_open_files(refinement.getSourceVariable(), refinement.getPredecessor().getLastNode(),
refinement.getSense())

View File

@@ -40,13 +40,16 @@ private import semmle.python.objects.ObjectInternal
/**
* A callable that is considered a "safe" external API from a security perspective.
*/
class SafeExternalAPI extends Unit {
class SafeExternalApi extends Unit {
/** Gets a callable that is considered a "safe" external API from a security perspective. */
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
}
/** DEPRECATED: Alias for SafeExternalApi */
deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalAPI extends SafeExternalAPI {
private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
exists(CallableValue cv | cv = result.getCallableValue() |
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
@@ -61,15 +64,15 @@ private class DefaultSafeExternalAPI extends SafeExternalAPI {
}
/** A node representing data being passed to an external API through a call. */
class ExternalAPIDataNode extends DataFlow::Node {
class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCall call;
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalAPIDataNode() {
ExternalApiDataNode() {
exists(call.getLocation().getFile().getRelativePath()) and
callable = call.getCallable() and
not any(SafeExternalAPI safe).getSafeCallable() = callable and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
@@ -98,38 +101,47 @@ class ExternalAPIDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
/** DEPRECATED: Alias for ExternalApiDataNode */
deprecated class ExternalAPIDataNode = ExternalApiDataNode;
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
class UntrustedDataToExternalApiConfig extends TaintTracking::Configuration {
UntrustedDataToExternalApiConfig() { this = "UntrustedDataToExternalAPIConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalApiDataNode }
}
/** DEPRECATED: Alias for UntrustedDataToExternalApiConfig */
deprecated class UntrustedDataToExternalAPIConfig = UntrustedDataToExternalApiConfig;
/** A node representing untrusted data being passed to an external API. */
class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
class UntrustedExternalApiDataNode extends ExternalApiDataNode {
UntrustedExternalApiDataNode() { any(UntrustedDataToExternalApiConfig c).hasFlow(_, this) }
/** Gets a source of untrusted data which is passed to this external API data node. */
DataFlow::Node getAnUntrustedSource() {
any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
any(UntrustedDataToExternalApiConfig c).hasFlow(result, this)
}
}
private newtype TExternalAPI =
TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalAPIDataNode n |
/** DEPRECATED: Alias for UntrustedExternalApiDataNode */
deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
private newtype TExternalApi =
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalApiDataNode n |
callable = n.getCallable() and
index = n.getIndex()
)
}
/** An external API which is used with untrusted data. */
class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
class ExternalApiUsedWithUntrustedData extends TExternalApi {
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalAPIDataNode getUntrustedDataNode() {
this = TExternalAPIParameter(result.getCallable(), result.getIndex())
UntrustedExternalApiDataNode getUntrustedDataNode() {
this = TExternalApiParameter(result.getCallable(), result.getIndex())
}
/** Gets the number of untrusted sources used with this external API. */
@@ -143,7 +155,7 @@ class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalAPIParameter(callable, index) and
this = TExternalApiParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
@@ -167,6 +179,9 @@ class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
}
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |

View File

@@ -11,7 +11,7 @@
import python
import ExternalAPIs
from ExternalAPIUsedWithUntrustedData externalAPI
select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
from ExternalApiUsedWithUntrustedData externalApi
select externalApi, count(externalApi.getUntrustedDataNode()) as numberOfUses,
externalApi.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
numberOfUntrustedSources desc

View File

@@ -14,11 +14,11 @@ import ExternalAPIs
import DataFlow::PathGraph
from
UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
ExternalAPIUsedWithUntrustedData externalAPI
UntrustedDataToExternalApiConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
ExternalApiUsedWithUntrustedData externalApi
where
sink.getNode() = externalAPI.getUntrustedDataNode() and
sink.getNode() = externalApi.getUntrustedDataNode() and
config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Call to " + externalAPI.toString() + " with untrusted data from $@.", source.getNode(),
"Call to " + externalApi.toString() + " with untrusted data from $@.", source.getNode(),
source.toString()

View File

@@ -0,0 +1,202 @@
/**
* Provides predicates for reasoning about regular expressions
* that match URLs and hostname patterns.
*/
private import HostnameRegexpSpecific
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
*/
predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
// Look for any of these cases:
// - A character that can't occur in the origin
// - Two dashes in a row
// - A colon that is not part of port or scheme separator
// - A slash that is not part of scheme separator
term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
}
/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
predicate isDotConstant(RegExpTerm term) {
term.(RegExpCharEscape).getValue() = "."
or
exists(RegExpCharacterClass cls |
term = cls and
not cls.isInverted() and
cls.getNumChild() = 1 and
cls.getAChild().(RegExpConstant).getValue() = "."
)
}
/** Holds if `term` is a wildcard `.` or an actual `.` character. */
predicate isDotLike(RegExpTerm term) {
term instanceof RegExpDot
or
isDotConstant(term)
}
/** Holds if `term` will only ever be matched against the beginning of the input. */
predicate matchesBeginningOfString(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent | matchesBeginningOfString(parent) |
term = parent.(RegExpSequence).getChild(0)
or
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
term = parent.(RegExpSequence).getChild(1)
or
term = parent.(RegExpAlt).getAChild()
or
term = parent.(RegExpGroup).getAChild()
)
}
/**
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
* excluding cases where this is at the very beginning of the regexp.
*
* `i` is bound to the index of the last child in the top-level domain part.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
/**
* Holds if the given regular expression term contains top-level domain preceded by a dot,
* such as `.com`.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
/**
* Holds if `term` will always match a hostname, that is, all disjunctions contain
* a hostname pattern that isn't inside a quantifier.
*/
predicate alwaysMatchesHostname(RegExpTerm term) {
hasTopLevelDomainEnding(term, _)
or
// `localhost` is considered a hostname pattern, but has no TLD
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
or
not term instanceof RegExpAlt and
not term instanceof RegExpQuantifier and
alwaysMatchesHostname(term.getAChild())
or
alwaysMatchesHostnameAlt(term)
}
/** Holds if every child of `alt` contains a hostname pattern. */
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
}
/**
* Holds if the first `i` children of `alt` contains a hostname pattern.
*
* This is used instead of `forall` to avoid materializing the set of alternatives
* that don't contains hostnames, which is much larger.
*/
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
alwaysMatchesHostname(alt.getChild(0)) and i = 0
or
alwaysMatchesHostnameAlt(alt, i - 1) and
alwaysMatchesHostname(alt.getChild(i))
}
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
) {
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}

View File

@@ -0,0 +1,3 @@
import semmle.python.RegexTreeView
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.Regexp

View File

@@ -30,7 +30,7 @@
<p>
Escape all meta-characters appropriately when constructing
regular expressions for security checks, pay special attention to the
regular expressions for security checks, and pay special attention to the
<code>.</code> meta-character.
</p>

View File

@@ -8,35 +8,9 @@
* @id py/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
* external/cwe/cwe-020
*/
import python
import semmle.python.regex
import HostnameRegexpShared
private string commonTopLevelDomainRegex() { result = "com|org|edu|gov|uk|net|io" }
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart =
pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + commonTopLevelDomainRegex() + "))" + ".*", 1)
}
from Regex r, string pattern, string hostPart
where
r.getText() = pattern and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + commonTopLevelDomainRegex() + ").*[(][?]:.*[)].*")
select r,
"This regular expression has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected."
query predicate problems = incompleteHostnameRegExp/4;

View File

@@ -17,10 +17,10 @@
*/
import python
import semmle.python.security.dataflow.PathInjection
import semmle.python.security.dataflow.PathInjectionQuery
import DataFlow::PathGraph
from PathInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This path depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -122,7 +122,7 @@ class ExtractMembersSink extends TaintSink {
class TarFileInfoSanitizer extends Sanitizer {
TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
/** The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
/* The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
taint instanceof TarFileInfo and
clears_taint_on_false_edge(test.getTest(), test.getSense())

View File

@@ -15,10 +15,10 @@
*/
import python
import semmle.python.security.dataflow.CommandInjection
import semmle.python.security.dataflow.CommandInjectionQuery
import DataFlow::PathGraph
from CommandInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -14,10 +14,10 @@
*/
import python
import semmle.python.security.dataflow.ReflectedXSS
import semmle.python.security.dataflow.ReflectedXssQuery
import DataFlow::PathGraph
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -12,10 +12,10 @@
*/
import python
import semmle.python.security.dataflow.SqlInjection
import semmle.python.security.dataflow.SqlInjectionQuery
import DataFlow::PathGraph
from SqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -13,15 +13,15 @@
// Determine precision above
import python
import semmle.python.security.dataflow.LdapInjection
import semmle.python.security.dataflow.LdapInjectionQuery
import DataFlow::PathGraph
from DataFlow::PathNode source, DataFlow::PathNode sink, string parameterName
where
any(LdapInjection::DnConfiguration dnConfig).hasFlowPath(source, sink) and
any(DnConfiguration dnConfig).hasFlowPath(source, sink) and
parameterName = "DN"
or
any(LdapInjection::FilterConfiguration filterConfig).hasFlowPath(source, sink) and
any(FilterConfiguration filterConfig).hasFlowPath(source, sink) and
parameterName = "filter"
select sink.getNode(), source, sink,
"$@ LDAP query parameter (" + parameterName + ") comes from $@.", sink.getNode(), "This",

View File

@@ -15,10 +15,10 @@
*/
import python
import semmle.python.security.dataflow.CodeInjection
import semmle.python.security.dataflow.CodeInjectionQuery
import DataFlow::PathGraph
from CodeInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -16,6 +16,6 @@
import semmle.python.security.BadTagFilterQuery
from HTMLMatchingRegExp regexp, string msg
from HtmlMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
select regexp, msg

View File

@@ -12,10 +12,10 @@
*/
import python
import semmle.python.security.dataflow.LogInjection
import semmle.python.security.dataflow.LogInjectionQuery
import DataFlow::PathGraph
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -14,10 +14,10 @@
*/
import python
import semmle.python.security.dataflow.StackTraceExposure
import semmle.python.security.dataflow.StackTraceExposureQuery
import DataFlow::PathGraph
from StackTraceExposure::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
"Error information"

View File

@@ -15,41 +15,11 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Gets a call to a method that makes an outgoing request using the `requests` module,
* such as `requests.get` or `requests.put`, with the specified HTTP verb `verb`
*/
DataFlow::CallCfgNode outgoingRequestCall(string verb) {
verb = HTTP::httpVerbLower() and
result = API::moduleImport("requests").getMember(verb).getACall()
}
/** Gets the "verfiy" argument to a outgoingRequestCall. */
DataFlow::Node verifyArg(DataFlow::CallCfgNode call) {
call = outgoingRequestCall(_) and
result = call.getArgByName("verify")
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = verifyArg(_) and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
from DataFlow::CallCfgNode call, DataFlow::Node falseyOrigin, string verb
from API::CallNode call, DataFlow::Node falseyOrigin, string verb
where
call = outgoingRequestCall(verb) and
falseyOrigin = verifyArgBacktracker(verifyArg(call)) and
verb = HTTP::httpVerbLower() and
call = API::moduleImport("requests").getMember(verb).getACall() and
falseyOrigin = call.getKeywordParameter("verify").getAValueReachingRhs() and
// requests treats `None` as the default and all other "falsey" values as `False`.
falseyOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not falseyOrigin.asExpr() instanceof None

View File

@@ -16,7 +16,7 @@
import python
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextLogging::CleartextLogging
import semmle.python.security.dataflow.CleartextLoggingQuery
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where

View File

@@ -16,7 +16,7 @@
import python
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextStorage::CleartextStorage
import semmle.python.security.dataflow.CleartextStorageQuery
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where

View File

@@ -85,7 +85,7 @@ abstract class TlsLibrary extends string {
bindingset[this]
TlsLibrary() { any() }
/** The name of a specific protocol version. */
/** Gets the name of a specific protocol version. */
abstract string specific_version_name(ProtocolVersion version);
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */

View File

@@ -13,7 +13,7 @@
*/
import python
import semmle.python.security.dataflow.WeakSensitiveDataHashing
import semmle.python.security.dataflow.WeakSensitiveDataHashingQuery
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph

View File

@@ -13,9 +13,9 @@
*/
import python
import semmle.python.security.dataflow.UnsafeDeserialization
import semmle.python.security.dataflow.UnsafeDeserializationQuery
import DataFlow::PathGraph
from UnsafeDeserialization::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -13,10 +13,10 @@
*/
import python
import semmle.python.security.dataflow.UrlRedirect
import semmle.python.security.dataflow.UrlRedirectQuery
import DataFlow::PathGraph
from UrlRedirect::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
"A user-provided value"

View File

@@ -0,0 +1,24 @@
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
<qhelp>
<overview>
<p>
If an XPath expression is built using string concatenation, and the components of the concatenation
include user input, it makes it very easy for a user to create a malicious XPath expression.
</p>
</overview>
<recommendation>
<p>
If user input must be included in an XPath expression, either sanitize the data or use variable
references to safely embed it without altering the structure of the expression.
</p>
</recommendation>
<example>
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
<sample src="xpathBad.py" />
<p> This can be fixed by using a parameterized query as shown below.</p>
<sample src="xpathGood.py" />
</example>
<references>
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
</references>
</qhelp>

View File

@@ -0,0 +1,20 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 9.8
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.dataflow.XpathInjectionQuery
import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -14,12 +14,12 @@
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoS
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph
from
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode,
PolynomialBackTrackingTerm regexp
where
config.hasFlowPath(source, sink) and
sinkNode = sink.getNode() and

View File

@@ -14,15 +14,15 @@
import python
private import semmle.python.Concepts
import semmle.python.security.dataflow.RegexInjection
import semmle.python.security.dataflow.RegexInjectionQuery
import DataFlow::PathGraph
from
RegexInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexExecution regexExecution
where
config.hasFlowPath(source, sink) and
regexExecution = sink.getNode().(RegexInjection::Sink).getRegexExecution()
regexExecution = sink.getNode().(Sink).getRegexExecution()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", regexExecution, regexExecution.getName()

View File

@@ -11,14 +11,14 @@
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import semmle.python.security.dataflow.ServerSideRequestForgeryQuery
import DataFlow::PathGraph
from
FullServerSideRequestForgery::Configuration fullConfig, DataFlow::PathNode source,
FullServerSideRequestForgeryConfiguration fullConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(FullServerSideRequestForgery::Sink).getRequest() and
request = sink.getNode().(Sink).getRequest() and
fullConfig.hasFlowPath(source, sink) and
fullyControlledRequest(request)
select request, source, sink, "The full URL of this request depends on $@.", source.getNode(),

View File

@@ -11,14 +11,14 @@
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import semmle.python.security.dataflow.ServerSideRequestForgeryQuery
import DataFlow::PathGraph
from
PartialServerSideRequestForgery::Configuration partialConfig, DataFlow::PathNode source,
PartialServerSideRequestForgeryConfiguration partialConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(PartialServerSideRequestForgery::Sink).getRequest() and
request = sink.getNode().(Sink).getRequest() and
partialConfig.hasFlowPath(source, sink) and
not fullyControlledRequest(request)
select request, source, sink, "Part of the URL of this request depends on $@.", source.getNode(),

View File

@@ -14,6 +14,14 @@
import python
predicate isInsideLoop(AstNode node) {
node.getParentNode() instanceof While
or
node.getParentNode() instanceof For
or
exists(AstNode prev | isInsideLoop(prev) | node = prev.getAChildNode())
}
from Delete del, Expr e, Function f
where
f.getLastStatement() = del and
@@ -21,7 +29,7 @@ where
f.containsInScope(e) and
not e instanceof Subscript and
not e instanceof Attribute and
not exists(Stmt s | s.(While).contains(del) or s.(For).contains(del)) and
not isInsideLoop(del) and
// False positive: calling `sys.exc_info` within a function results in a
// reference cycle, and an explicit call to `del` helps break this cycle.
not exists(FunctionValue ex |

View File

@@ -5,12 +5,12 @@ import python
*/
class Definition extends NameNode, DefinitionNode {
/**
* The variable defined by this control-flow node.
* Gets the variable defined by this control-flow node.
*/
Variable getVariable() { this.defines(result) }
/**
* The SSA variable corresponding to the current definition. Since SSA variables
* Gets the SSA variable corresponding to the current definition. Since SSA variables
* are only generated for definitions with at least one use, not all definitions
* will have an SSA variable.
*/
@@ -67,7 +67,7 @@ class Definition extends NameNode, DefinitionNode {
}
/**
* An immediate re-definition of this definition's variable.
* Gets an immediate re-definition of this definition's variable.
*/
Definition getARedef() {
result != this and

View File

@@ -47,6 +47,8 @@ predicate simple_literal(Expr e) {
}
/**
* Holds if the redefinition is uninteresting.
*
* A multiple definition is 'uninteresting' if it sets a variable to a
* simple literal before reassigning it.
* x = None

View File

@@ -17,30 +17,13 @@ import Shadowing
import semmle.python.types.Builtins
predicate allow_list(string name) {
/* These are rarely used and thus unlikely to be confusing */
name = "iter" or
name = "next" or
name = "input" or
name = "file" or
name = "apply" or
name = "slice" or
name = "buffer" or
name = "coerce" or
name = "intern" or
name = "exit" or
name = "quit" or
name = "license" or
/* These are short and/or hard to avoid */
name = "dir" or
name = "id" or
name = "max" or
name = "min" or
name = "sum" or
name = "cmp" or
name = "chr" or
name = "ord" or
name = "bytes" or
name = "_"
name in [
/* These are rarely used and thus unlikely to be confusing */
"iter", "next", "input", "file", "apply", "slice", "buffer", "coerce", "intern", "exit",
"quit", "license",
/* These are short and/or hard to avoid */
"dir", "id", "max", "min", "sum", "cmp", "chr", "ord", "bytes", "_",
]
}
predicate shadows(Name d, string name, Function scope, int line) {

View File

@@ -2,7 +2,7 @@ import python
import Loop
import semmle.python.dataflow.TaintTracking
/** Marker for "uninitialized". */
/** A marker for "uninitialized". */
class Uninitialized extends TaintKind {
Uninitialized() { this = "undefined" }
}

View File

@@ -259,7 +259,7 @@ predicate file_consistency(string clsname, string problem, string what) {
exists(Container f |
clsname = f.getAQlClass() and
uniqueness_error(count(f.toString()), "toString", problem) and
what = "file " + f.getName()
what = "file " + f.getAbsolutePath()
)
}

View File

@@ -467,10 +467,10 @@ Definition getUniqueDefinition(Expr use) {
not result = TLocalDefinition(use)
}
/** Helper class to get suitable locations for attributes */
class NiceLocationExpr extends @py_expr {
/** A helper class to get suitable locations for attributes */
class NiceLocationExpr extends Expr {
/** Gets a textual representation of this element. */
string toString() { result = this.(Expr).toString() }
override string toString() { result = this.(Expr).toString() }
/**
* Holds if this element is at the specified location.

View File

@@ -58,7 +58,7 @@ predicate ok_to_fail(ImportExpr ie) {
os_specific_import(ie) != get_os()
}
class VersionTest extends @py_flow_node {
class VersionTest extends ControlFlowNode {
VersionTest() {
exists(string name |
name.matches("%version%") and
@@ -66,7 +66,7 @@ class VersionTest extends @py_flow_node {
)
}
string toString() { result = "VersionTest" }
override string toString() { result = "VersionTest" }
}
/** A guard on the version of the Python interpreter */

View File

@@ -1,4 +0,0 @@
---
category: newQuery
---
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).

View File

@@ -1,4 +0,0 @@
---
category: newQuery
---
* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).

View File

@@ -0,0 +1,6 @@
## 0.0.10
### New Queries
* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).

View File

@@ -0,0 +1,5 @@
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).

View File

@@ -0,0 +1 @@
## 0.0.12

View File

@@ -0,0 +1 @@
## 0.0.13

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.9
lastReleaseVersion: 0.0.13

View File

@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious zip archive without validating that the destination file path
is within the destination directory can cause files outside the destination directory to be
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
archive paths.</p>
<p>Zip archives contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a Zip archive contains a file entry <code>..\sneaky-file</code>, and the Zip archive
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
written to <code>c:\sneaky-file</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from Zip archive entries are validated
to prevent writing files to unexpected locations.</p>
<p>The recommended way of writing an output file from a Zip archive entry is to call <code>extract()</code> or <code>extractall()</code>.
</p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
</p>
<sample src="zipslip_bad.py" />
<p>To fix this vulnerability, we need to call the function <code>extractall()</code>.
</p>
<sample src="zipslip_good.py" />
</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name Arbitrary file write during archive extraction ("Zip Slip")
* @description Extracting files from a malicious archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/zipslip
* @problem.severity error
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-022
*/
import python
import experimental.semmle.python.security.ZipSlip
import DataFlow::PathGraph
from ZipSlipConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Extraction of zipfile from $@", source.getNode(),
"a potentially untrusted source"

View File

@@ -0,0 +1,16 @@
import zipfile
import shutil
def unzip(filename):
with tarfile.open(filename) as zipf:
#BAD : This could write any file on the filesystem.
for entry in zipf:
shutil.copyfile(entry, "/tmp/unpack/")
def unzip4(filename):
zf = zipfile.ZipFile(filename)
filelist = zf.namelist()
for x in filelist:
with zf.open(x) as srcf:
shutil.copyfileobj(srcf, dstfile)

View File

@@ -0,0 +1,10 @@
import zipfile
def unzip(filename, dir):
zf = zipfile.ZipFile(filename)
zf.extractall(dir)
def unzip1(filename, dir):
zf = zipfile.ZipFile(filename)
zf.extract(dir)

View File

@@ -13,7 +13,7 @@ import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
predicate authenticatesImproperly(LDAPBind ldapBind) {
predicate authenticatesImproperly(LdapBind ldapBind) {
(
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
not exists(ldapBind.getPassword())
@@ -25,6 +25,6 @@ predicate authenticatesImproperly(LDAPBind ldapBind) {
)
}
from LDAPBind ldapBind
from LdapBind ldapBind
where authenticatesImproperly(ldapBind)
select ldapBind, "The following LDAP bind operation is executed without authentication"

View File

@@ -12,7 +12,7 @@ import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.JWT
from JWTEncoding jwtEncoding, string affectedComponent
from JwtEncoding jwtEncoding, string affectedComponent
where
affectedComponent = "algorithm" and
isEmptyOrNone(jwtEncoding.getAlgorithm())

View File

@@ -12,6 +12,6 @@
import python
import experimental.semmle.python.Concepts
from JWTDecoding jwtDecoding
from JwtDecoding jwtDecoding
where not jwtDecoding.verifiesSignature()
select jwtDecoding.getPayload(), "is not verified with a cryptographic secret or public key."

View File

@@ -18,7 +18,7 @@ import ClientSuppliedIpUsedInSecurityCheckLib
import DataFlow::PathGraph
/**
* Taint-tracking configuration tracing flow from obtaining a client ip from an HTTP header to a sensitive use.
* A taint-tracking configuration tracing flow from obtaining a client ip from an HTTP header to a sensitive use.
*/
class ClientSuppliedIpUsedInSecurityCheckConfig extends TaintTracking::Configuration {
ClientSuppliedIpUsedInSecurityCheckConfig() { this = "ClientSuppliedIpUsedInSecurityCheckConfig" }

View File

@@ -14,7 +14,7 @@ import python
import DataFlow::PathGraph
import experimental.semmle.python.security.LDAPInsecureAuth
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LdapInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
"This LDAP host"

View File

@@ -0,0 +1,25 @@
/**
* @name SimpleXMLRPCServer DoS vulnerability
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
* @kind problem
* @problem.severity warning
* @precision high
* @id py/simple-xml-rpc-server-dos
* @tags security
* external/cwe/cwe-776
*/
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
from DataFlow::CallCfgNode call, string kinds
where
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
kinds =
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
kind, ", "
)
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."

View File

@@ -0,0 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<test>&xxe;</test>

View File

@@ -0,0 +1,25 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
app = Flask(__name__)
# BAD
@app.route("/bad")
def bad():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text
# GOOD
@app.route("/good")
def good():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -0,0 +1,48 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
</p>
</overview>
<recommendation>
<p>
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
to prevent any potentially malicious operation.
</p>
</recommendation>
<example>
<p>
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
</p>
<sample src="XmlEntityInjection.py"/>
<p>
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
<code>/etc/passwd</code>.
</p>
<sample src="XXE.xml"/>
</example>
<references>
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
<li>Out-of-band data retrieval: Timur Yunusov &amp; Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name XML Entity injection
* @description User input should not be parsed allowing the injection of entities.
* @kind path-problem
* @problem.severity error
* @id py/xml-entity-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
from
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
DataFlow::PathNode sink, string kinds
where
config.hasFlowPath(source, sink) and
kinds =
strictconcat(string kind |
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
|
kind, ", "
)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -1,30 +0,0 @@
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
<qhelp>
<overview>
<p>
Using user-supplied information to construct an XPath query for XML data can
result in an XPath injection flaw. By sending intentionally malformed information,
an attacker can access data that he may not normally have access to.
He/She may even be able to elevate his privileges on the web site if the XML data
is being used for authentication (such as an XML based user file).
</p>
</overview>
<recommendation>
<p>
XPath injection can be prevented using parameterized XPath interface or escaping the user input to make it safe to include in a dynamically constructed query.
If you are using quotes to terminate untrusted input in a dynamically constructed XPath query, then you need to escape that quote in the untrusted input to ensure the untrusted data cant try to break out of that quoted context.
</p>
<p>
Another better mitigation option is to use a precompiled XPath query. Precompiled XPath queries are already preset before the program executes, rather than created on the fly after the users input has been added to the string. This is a better route because you dont have to worry about missing a character that should have been escaped.
</p>
</recommendation>
<example>
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
<sample src="xpathBad.py" />
<p> This can be fixed by using a parameterized query as shown below.</p>
<sample src="xpathGood.py" />
</example>
<references>
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
</references>
</qhelp>

View File

@@ -1,33 +0,0 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
import XpathInjection::XpathInjection
import DataFlow::PathGraph
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
}
from XpathInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -1,35 +0,0 @@
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `XpathInjection::Configuration` is needed, otherwise
* `XpathInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
module XpathInjection {
import XpathInjectionCustomizations::XpathInjection
/**
* A taint-tracking configuration for detecting "Xpath Injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xpath Injection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -1,105 +0,0 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/**
* A data flow source for "XPath injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "XPath injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "XPath injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "XPath injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** Returns an API node referring to `lxml.etree` */
API::Node etree() { result = API::moduleImport("lxml").getMember("etree") }
/** Returns an API node referring to `lxml.etree` */
API::Node etreeFromString() { result = etree().getMember("fromstring") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node etreeParse() { result = etree().getMember("parse") }
/** Returns an API node referring to `lxml.etree.parse` */
API::Node libxml2parseFile() { result = API::moduleImport("libxml2").getMember("parseFile") }
/**
* A Sink representing an argument to `etree.XPath` or `etree.ETXPath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
* find_text = etree.ETXPath("`sink`")
*/
private class EtreeXpathArgument extends Sink {
EtreeXpathArgument() { this = etree().getMember(["XPath", "ETXPath"]).getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `etree.XPath` call.
*
* from lxml import etree
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
* find_text = root.xpath("`sink`")
*/
private class EtreeFromstringXpathArgument extends Sink {
EtreeFromstringXpathArgument() {
this = etreeFromString().getReturn().getMember("xpath").getACall().getArg(0)
}
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends Sink {
ParseXpathArgument() { this = etreeParse().getReturn().getMember("xpath").getACall().getArg(0) }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends Sink {
ParseFileXpathEvalArgument() {
this = libxml2parseFile().getReturn().getMember("xpathEval").getACall().getArg(0)
}
}
}

View File

@@ -13,7 +13,7 @@ import python
import experimental.semmle.python.security.injection.NoSQLInjection
import DataFlow::PathGraph
from NoSQLInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from NoSqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
"user-provided value"

View File

@@ -14,8 +14,143 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling copying file related APIs. */
module CopyFile {
/**
* A data flow node for copying file.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CopyFile` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the path.
*/
abstract DataFlow::Node getAPathArgument();
/**
* Gets fsrc argument.
*/
abstract DataFlow::Node getfsrcArgument();
}
}
/**
* A data flow node for copying file.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CopyFile::Range` instead.
*/
class CopyFile extends DataFlow::Node {
CopyFile::Range range;
CopyFile() { this = range }
DataFlow::Node getAPathArgument() { result = range.getAPathArgument() }
DataFlow::Node getfsrcArgument() { result = range.getfsrcArgument() }
}
/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LogOutput::Range` instead.
*/
class LogOutput extends DataFlow::Node {
LogOutput::Range range;
LogOutput() { this = range }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/**
* Since there is both XML module in normal and experimental Concepts,
* we have to rename the experimental module as this.
*/
module ExperimentalXML {
/**
* A kind of XML vulnerability.
*
* See https://pypi.org/project/defusedxml/#python-xml-libraries
*/
class XMLVulnerabilityKind extends string {
XMLVulnerabilityKind() {
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
}
/** Holds for Billion Laughs vulnerability kind. */
predicate isBillionLaughs() { this = "Billion Laughs" }
/** Holds for Quadratic Blowup vulnerability kind. */
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the content to parse.
*/
abstract DataFlow::Node getAnInput();
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
}
}
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
module LdapQuery {
/**
* A data-flow node that collects methods executing a LDAP query.
*
@@ -30,16 +165,19 @@ module LDAPQuery {
}
}
/** DEPRECATED: Alias for LdapQuery */
deprecated module LDAPQuery = LdapQuery;
/**
* A data-flow node that collect methods executing a LDAP query.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LDAPQuery extends DataFlow::Node {
LDAPQuery::Range range;
class LdapQuery extends DataFlow::Node {
LdapQuery::Range range;
LDAPQuery() { this = range }
LdapQuery() { this = range }
/**
* Gets the argument containing the executed expression.
@@ -47,8 +185,11 @@ class LDAPQuery extends DataFlow::Node {
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** DEPRECATED: Alias for LdapQuery */
deprecated class LDAPQuery = LdapQuery;
/** Provides classes for modeling LDAP components escape-related APIs. */
module LDAPEscape {
module LdapEscape {
/**
* A data-flow node that collects functions escaping LDAP components.
*
@@ -63,16 +204,19 @@ module LDAPEscape {
}
}
/** DEPRECATED: Alias for LdapEscape */
deprecated module LDAPEscape = LdapEscape;
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPEscape::Range` instead.
*/
class LDAPEscape extends DataFlow::Node {
LDAPEscape::Range range;
class LdapEscape extends DataFlow::Node {
LdapEscape::Range range;
LDAPEscape() { this = range }
LdapEscape() { this = range }
/**
* Gets the argument containing the escaped expression.
@@ -80,8 +224,11 @@ class LDAPEscape extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** DEPRECATED: Alias for LdapEscape */
deprecated class LDAPEscape = LdapEscape;
/** Provides classes for modeling LDAP bind-related APIs. */
module LDAPBind {
module LdapBind {
/**
* A data-flow node that collects methods binding a LDAP connection.
*
@@ -106,16 +253,19 @@ module LDAPBind {
}
}
/** DEPRECATED: Alias for LdapBind */
deprecated module LDAPBind = LdapBind;
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPBind::Range` instead.
*/
class LDAPBind extends DataFlow::Node {
LDAPBind::Range range;
class LdapBind extends DataFlow::Node {
LdapBind::Range range;
LDAPBind() { this = range }
LdapBind() { this = range }
/**
* Gets the argument containing the binding host.
@@ -133,8 +283,11 @@ class LDAPBind extends DataFlow::Node {
predicate useSSL() { range.useSSL() }
}
/** DEPRECATED: Alias for LdapBind */
deprecated class LDAPBind = LdapBind;
/** Provides classes for modeling SQL sanitization libraries. */
module SQLEscape {
module SqlEscape {
/**
* A data-flow node that collects functions that escape SQL statements.
*
@@ -149,16 +302,19 @@ module SQLEscape {
}
}
/** DEPRECATED: Alias for SqlEscape */
deprecated module SQLEscape = SqlEscape;
/**
* A data-flow node that collects functions escaping SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SQLEscape::Range` instead.
*/
class SQLEscape extends DataFlow::Node {
SQLEscape::Range range;
class SqlEscape extends DataFlow::Node {
SqlEscape::Range range;
SQLEscape() { this = range }
SqlEscape() { this = range }
/**
* Gets the argument containing the raw SQL statement.
@@ -166,8 +322,11 @@ class SQLEscape extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSQLQuery {
/** DEPRECATED: Alias for SqlEscape */
deprecated class SQLEscape = SqlEscape;
/** Provides a class for modeling NoSql execution APIs. */
module NoSqlQuery {
/**
* A data-flow node that executes NoSQL queries.
*
@@ -175,28 +334,34 @@ module NoSQLQuery {
* extend `NoSQLQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
/** Gets the argument that specifies the NoSql query to be executed. */
abstract DataFlow::Node getQuery();
}
}
/** DEPRECATED: Alias for NoSqlQuery */
deprecated module NoSQLQuery = NoSqlQuery;
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSQLQuery::Range` instead.
*/
class NoSQLQuery extends DataFlow::Node {
NoSQLQuery::Range range;
class NoSqlQuery extends DataFlow::Node {
NoSqlQuery::Range range;
NoSQLQuery() { this = range }
NoSqlQuery() { this = range }
/** Gets the argument that specifies the NoSQL query to be executed. */
/** Gets the argument that specifies the NoSql query to be executed. */
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling NoSQL sanitization-related APIs. */
module NoSQLSanitizer {
/** DEPRECATED: Alias for NoSqlQuery */
deprecated class NoSQLQuery = NoSqlQuery;
/** Provides classes for modeling NoSql sanitization-related APIs. */
module NoSqlSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
@@ -204,26 +369,32 @@ module NoSQLSanitizer {
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be sanitized. */
/** Gets the argument that specifies the NoSql query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/** DEPRECATED: Alias for NoSqlSanitizer */
deprecated module NoSQLSanitizer = NoSqlSanitizer;
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSQLSanitizer extends DataFlow::Node {
NoSQLSanitizer::Range range;
class NoSqlSanitizer extends DataFlow::Node {
NoSqlSanitizer::Range range;
NoSQLSanitizer() { this = range }
NoSqlSanitizer() { this = range }
/** Gets the argument that specifies the NoSQL query to be sanitized. */
/** Gets the argument that specifies the NoSql query to be sanitized. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** DEPRECATED: Alias for NoSqlSanitizer */
deprecated class NoSQLSanitizer = NoSqlSanitizer;
/** Provides classes for modeling HTTP Header APIs. */
module HeaderDeclaration {
/**
@@ -268,7 +439,7 @@ class HeaderDeclaration extends DataFlow::Node {
}
/** Provides classes for modeling JWT encoding-related APIs. */
module JWTEncoding {
module JwtEncoding {
/**
* A data-flow node that collects methods encoding a JWT token.
*
@@ -298,13 +469,16 @@ module JWTEncoding {
}
}
/** DEPRECATED: Alias for JwtEncoding */
deprecated module JWTEncoding = JwtEncoding;
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTEncoding::Range` instead.
*/
class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
class JwtEncoding extends DataFlow::Node instanceof JwtEncoding::Range {
/**
* Gets the argument containing the payload.
*/
@@ -326,8 +500,11 @@ class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
string getAlgorithmString() { result = super.getAlgorithmString() }
}
/** DEPRECATED: Alias for JwtEncoding */
deprecated class JWTEncoding = JwtEncoding;
/** Provides classes for modeling JWT decoding-related APIs. */
module JWTDecoding {
module JwtDecoding {
/**
* A data-flow node that collects methods decoding a JWT token.
*
@@ -367,13 +544,16 @@ module JWTDecoding {
}
}
/** DEPRECATED: Alias for JwtDecoding */
deprecated module JWTDecoding = JwtDecoding;
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTDecoding::Range` instead.
*/
class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
class JwtDecoding extends DataFlow::Node instanceof JwtDecoding::Range {
/**
* Gets the argument containing the payload.
*/
@@ -405,6 +585,9 @@ class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
predicate verifiesSignature() { super.verifiesSignature() }
}
/** DEPRECATED: Alias for JwtDecoding */
deprecated class JWTDecoding = JwtDecoding;
/** Provides classes for modeling Email APIs. */
module EmailSender {
/**

View File

@@ -3,6 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
@@ -13,6 +14,7 @@ private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Python_JWT
private import experimental.semmle.python.libraries.Authlib
private import experimental.semmle.python.libraries.PythonJose
private import experimental.semmle.python.frameworks.CopyFile
private import experimental.semmle.python.frameworks.Sendgrid
private import experimental.semmle.python.libraries.FlaskMail
private import experimental.semmle.python.libraries.SmtpLib

View File

@@ -0,0 +1,42 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
private module CopyFileImpl {
/**
* The `shutil` module provides methods to copy or move files.
* See:
* - https://docs.python.org/3/library/shutil.html#shutil.copyfile
* - https://docs.python.org/3/library/shutil.html#shutil.copy
* - https://docs.python.org/3/library/shutil.html#shutil.copy2
* - https://docs.python.org/3/library/shutil.html#shutil.copytree
* - https://docs.python.org/3/library/shutil.html#shutil.move
*/
private class CopyFiles extends DataFlow::CallCfgNode, CopyFile::Range {
CopyFiles() {
this =
API::moduleImport("shutil")
.getMember(["copyfile", "copy", "copy2", "copytree", "move"])
.getACall()
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("src")]
}
override DataFlow::Node getfsrcArgument() { none() }
}
// TODO: once we have flow summaries, model `shutil.copyfileobj` which copies the content between its' file-like arguments.
// See https://docs.python.org/3/library/shutil.html#shutil.copyfileobj
private class CopyFileobj extends DataFlow::CallCfgNode, CopyFile::Range {
CopyFileobj() { this = API::moduleImport("shutil").getMember("copyfileobj").getACall() }
override DataFlow::Node getfsrcArgument() {
result in [this.getArg(0), this.getArgByName("fsrc")]
}
override DataFlow::Node getAPathArgument() { none() }
}
}

View File

@@ -11,15 +11,15 @@ private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.RemoteFlowSources
private module ExperimentalPrivateDjango {
private module django {
private module DjangoMod {
API::Node http() { result = API::moduleImport("django").getMember("http") }
module http {
module Http {
API::Node response() { result = http().getMember("response") }
API::Node request() { result = http().getMember("request") }
module request {
module Request {
module HttpRequest {
class DjangoGETParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGETParameter() { this = request().getMember("GET").getMember("get").getACall() }
@@ -29,7 +29,7 @@ private module ExperimentalPrivateDjango {
}
}
module response {
module Response {
module HttpResponse {
API::Node baseClassRef() {
result = response().getMember("HttpResponse").getReturn()

View File

@@ -60,7 +60,7 @@ private module LDAP {
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
private class LDAP2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP2Query() { this.getFunction() = ldapQuery() }
override DataFlow::Node getQuery() {
@@ -98,7 +98,7 @@ private module LDAP {
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
private class LDAP2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
@@ -149,7 +149,7 @@ private module LDAP {
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
@@ -160,7 +160,7 @@ private module LDAP {
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeFilterCall() {
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
@@ -190,7 +190,7 @@ private module LDAP {
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
private class LDAP3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP3Query() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
@@ -203,7 +203,7 @@ private module LDAP {
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
class LDAP3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
@@ -241,7 +241,7 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
@@ -252,7 +252,7 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeFilterCall() {
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}

View File

@@ -10,7 +10,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module NoSQL {
private module NoSql {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
@@ -153,7 +153,7 @@ private module NoSQL {
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
override DataFlow::Node getQuery() { result = this.getArg(0) }
@@ -174,7 +174,7 @@ private module NoSQL {
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
@@ -188,7 +188,7 @@ private module NoSQL {
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
@@ -202,7 +202,7 @@ private module NoSQL {
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
BsonObjectIdCall() {
this =
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])

View File

@@ -12,7 +12,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Werkzeug {
module datastructures {
module Datastructures {
module Headers {
class WerkzeugHeaderAddCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
WerkzeugHeaderAddCall() {

View File

@@ -0,0 +1,466 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
module XML = ExperimentalXML;
private module XmlEtree {
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XMLParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getACall()
or
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLPullParser")
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
private module SaxBasedParsing {
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(SaxParserSetFeatureCall c).getStateArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 |
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
)
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
}
private module Lxml {
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XMLParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
or
kind.isDtdRetrieval() and
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
}
}
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
}

View File

@@ -5,7 +5,7 @@ private import experimental.semmle.python.frameworks.JWT
private module Authlib {
/** Gets a reference to `authlib.jose.(jwt|JsonWebToken)` */
private API::Node authlibJWT() {
private API::Node authlibJwt() {
result in [
API::moduleImport("authlib").getMember("jose").getMember("jwt"),
API::moduleImport("authlib").getMember("jose").getMember("JsonWebToken").getReturn()
@@ -13,10 +13,10 @@ private module Authlib {
}
/** Gets a reference to `jwt.encode` */
private API::Node authlibJWTEncode() { result = authlibJWT().getMember("encode") }
private API::Node authlibJwtEncode() { result = authlibJwt().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node authlibJWTDecode() { result = authlibJWT().getMember("decode") }
private API::Node authlibJwtDecode() { result = authlibJwt().getMember("decode") }
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).encode`.
@@ -33,8 +33,8 @@ private module Authlib {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class AuthlibJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
AuthlibJWTEncodeCall() { this = authlibJWTEncode().getACall() }
private class AuthlibJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
AuthlibJwtEncodeCall() { this = authlibJwtEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(1) }
@@ -69,8 +69,8 @@ private module Authlib {
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
*/
private class AuthlibJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
AuthlibJWTDecodeCall() { this = authlibJWTDecode().getACall() }
private class AuthlibJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
AuthlibJwtDecodeCall() { this = authlibJwtDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }

View File

@@ -3,7 +3,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PyJWT {
private module PyJwt {
/** Gets a reference to `jwt.encode` */
private API::Node pyjwtEncode() { result = API::moduleImport("jwt").getMember("encode") }
@@ -25,8 +25,8 @@ private module PyJWT {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class PyJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
PyJWTEncodeCall() { this = pyjwtEncode().getACall() }
private class PyJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
PyJwtEncodeCall() { this = pyjwtEncode().getACall() }
override DataFlow::Node getPayload() {
result in [this.getArg(0), this.getArgByName("payload")]
@@ -63,8 +63,8 @@ private module PyJWT {
* * `getOptions()`'s result would be `{"verify_signature": True}`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PyJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
PyJWTDecodeCall() { this = pyjwtDecode().getACall() }
private class PyJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
PyJwtDecodeCall() { this = pyjwtDecode().getACall() }
override DataFlow::Node getPayload() { result in [this.getArg(0), this.getArgByName("jwt")] }

View File

@@ -5,13 +5,13 @@ private import experimental.semmle.python.frameworks.JWT
private module PythonJose {
/** Gets a reference to `jwt` */
private API::Node joseJWT() { result = API::moduleImport("jose").getMember("jwt") }
private API::Node joseJwt() { result = API::moduleImport("jose").getMember("jwt") }
/** Gets a reference to `jwt.encode` */
private API::Node joseJWTEncode() { result = joseJWT().getMember("encode") }
private API::Node joseJwtEncode() { result = joseJwt().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node joseJWTDecode() { result = joseJWT().getMember("decode") }
private API::Node joseJwtDecode() { result = joseJwt().getMember("decode") }
/**
* Gets a call to `jwt.encode`.
@@ -28,8 +28,8 @@ private module PythonJose {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class JoseJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
JoseJWTEncodeCall() { this = joseJWTEncode().getACall() }
private class JoseJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
JoseJwtEncodeCall() { this = joseJwtEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
@@ -64,8 +64,8 @@ private module PythonJose {
* * `getOptions()`'s result would be none.
* * `verifiesSignature()` predicate would succeed.
*/
private class JoseJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
JoseJWTDecodeCall() { this = joseJWTDecode().getACall() }
private class JoseJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
JoseJwtDecodeCall() { this = joseJwtDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }

View File

@@ -2,7 +2,7 @@ private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Python_JWT {
private module Python_Jwt {
/**
* Gets a call to `python_jwt.process_jwt`.
*
@@ -21,7 +21,7 @@ private module Python_JWT {
* * `getOptions()`'s result would be `none()`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PythonJwtProcessCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
private class PythonJwtProcessCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
PythonJwtProcessCall() {
this = API::moduleImport("python_jwt").getMember("process_jwt").getACall()
}

View File

@@ -18,8 +18,8 @@ string getPrivateHostRegex() {
}
// "ldap://somethingon.theinternet.com"
class LDAPFullHost extends StrConst {
LDAPFullHost() {
class LdapFullHost extends StrConst {
LdapFullHost() {
exists(string s |
s = this.getText() and
s.regexpMatch(getFullHostRegex()) and
@@ -29,27 +29,39 @@ class LDAPFullHost extends StrConst {
}
}
class LDAPSchema extends StrConst {
LDAPSchema() { this.getText().regexpMatch(getSchemaRegex()) }
/** DEPRECATED: Alias for LdapFullHost */
deprecated class LDAPFullHost = LdapFullHost;
class LdapSchema extends StrConst {
LdapSchema() { this.getText().regexpMatch(getSchemaRegex()) }
}
class LDAPPrivateHost extends StrConst {
LDAPPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
/** DEPRECATED: Alias for LdapSchema */
deprecated class LDAPSchema = LdapSchema;
class LdapPrivateHost extends StrConst {
LdapPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
}
predicate concatAndCompareAgainstFullHostRegex(LDAPSchema schema, StrConst host) {
not host instanceof LDAPPrivateHost and
/** DEPRECATED: Alias for LdapPrivateHost */
deprecated class LDAPPrivateHost = LdapPrivateHost;
predicate concatAndCompareAgainstFullHostRegex(LdapSchema schema, StrConst host) {
not host instanceof LdapPrivateHost and
(schema.getText() + host.getText()).regexpMatch(getFullHostRegex())
}
// "ldap://" + "somethingon.theinternet.com"
class LDAPBothStrings extends BinaryExpr {
LDAPBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
class LdapBothStrings extends BinaryExpr {
LdapBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
}
/** DEPRECATED: Alias for LdapBothStrings */
deprecated class LDAPBothStrings = LdapBothStrings;
// schema + host
class LDAPBothVar extends BinaryExpr {
LDAPBothVar() {
class LdapBothVar extends BinaryExpr {
LdapBothVar() {
exists(SsaVariable schemaVar, SsaVariable hostVar |
this.getLeft() = schemaVar.getVariable().getALoad() and // getAUse is incompatible with Expr
this.getRight() = hostVar.getVariable().getALoad() and
@@ -61,9 +73,12 @@ class LDAPBothVar extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapBothVar */
deprecated class LDAPBothVar = LdapBothVar;
// schema + "somethingon.theinternet.com"
class LDAPVarString extends BinaryExpr {
LDAPVarString() {
class LdapVarString extends BinaryExpr {
LdapVarString() {
exists(SsaVariable schemaVar |
this.getLeft() = schemaVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
@@ -74,9 +89,12 @@ class LDAPVarString extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapVarString */
deprecated class LDAPVarString = LdapVarString;
// "ldap://" + host
class LDAPStringVar extends BinaryExpr {
LDAPStringVar() {
class LdapStringVar extends BinaryExpr {
LdapStringVar() {
exists(SsaVariable hostVar |
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(this.getLeft(),
@@ -85,22 +103,28 @@ class LDAPStringVar extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapStringVar */
deprecated class LDAPStringVar = LdapStringVar;
/**
* A taint-tracking configuration for detecting LDAP insecure authentications.
*/
class LDAPInsecureAuthConfig extends TaintTracking::Configuration {
LDAPInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
class LdapInsecureAuthConfig extends TaintTracking::Configuration {
LdapInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSource or
source.asExpr() instanceof LDAPFullHost or
source.asExpr() instanceof LDAPBothStrings or
source.asExpr() instanceof LDAPBothVar or
source.asExpr() instanceof LDAPVarString or
source.asExpr() instanceof LDAPStringVar
source.asExpr() instanceof LdapFullHost or
source.asExpr() instanceof LdapBothStrings or
source.asExpr() instanceof LdapBothVar or
source.asExpr() instanceof LdapVarString or
source.asExpr() instanceof LdapStringVar
}
override predicate isSink(DataFlow::Node sink) {
exists(LDAPBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
exists(LdapBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
}
}
/** DEPRECATED: Alias for LdapInsecureAuthConfig */
deprecated class LDAPInsecureAuthConfig = LdapInsecureAuthConfig;

View File

@@ -0,0 +1,39 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
class ZipSlipConfig extends TaintTracking::Configuration {
ZipSlipConfig() { this = "ZipSlipConfig" }
override predicate isSource(DataFlow::Node source) {
(
source =
API::moduleImport("zipfile").getMember("ZipFile").getReturn().getMember("open").getACall() or
source =
API::moduleImport("zipfile")
.getMember("ZipFile")
.getReturn()
.getMember("namelist")
.getACall() or
source = API::moduleImport("tarfile").getMember("open").getACall() or
source = API::moduleImport("tarfile").getMember("TarFile").getACall() or
source = API::moduleImport("bz2").getMember("open").getACall() or
source = API::moduleImport("bz2").getMember("BZ2File").getACall() or
source = API::moduleImport("gzip").getMember("GzipFile").getACall() or
source = API::moduleImport("gzip").getMember("open").getACall() or
source = API::moduleImport("lzma").getMember("open").getACall() or
source = API::moduleImport("lzma").getMember("LZMAFile").getACall()
) and
not source.getScope().getLocation().getFile().inStdlib()
}
override predicate isSink(DataFlow::Node sink) {
(
sink = any(CopyFile copyfile).getAPathArgument() or
sink = any(CopyFile copyfile).getfsrcArgument()
) and
not sink.getScope().getLocation().getFile().inStdlib()
}
}

View File

@@ -0,0 +1,28 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
module XmlEntityInjection {
import XmlEntityInjectionCustomizations::XmlEntityInjection
class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
}

View File

@@ -0,0 +1,86 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlEntityInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the kind of XML injection that this sink is vulnerable to. */
abstract string getVulnerableKind();
}
/**
* A sanitizer guard for "xml injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A unit class for adding additional taint steps.
*
* Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
* taint configuration.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
* step for `XmlEntityInjection` configuration.
*/
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
}
/**
* An input to a direct XML parsing function, considered as a flow sink.
*
* See `XML::XMLParsing`.
*/
class XMLParsingInputAsSink extends Sink {
ExperimentalXML::XMLParsing xmlParsing;
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
}
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A taint step for `io`'s `StringIO` and `BytesIO` methods.
*/
class IoAdditionalTaintStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}
}

View File

@@ -5,7 +5,7 @@ import semmle.python.dataflow.new.RemoteFlowSources
import experimental.semmle.python.Concepts
import semmle.python.Concepts
module NoSQLInjection {
module NoSqlInjection {
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NoSQLInjection" }
@@ -15,17 +15,17 @@ module NoSQLInjection {
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink = any(NoSQLQuery noSQLQuery).getQuery() and
sink = any(NoSqlQuery noSqlQuery).getQuery() and
state instanceof ConvertedToDict
}
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
// Block `RemoteInput` paths here, since they change state to `ConvertedToDict`
exists(Decoding decoding | decoding.getFormat() = "JSON" and node = decoding.getOutput()) and
state instanceof RemoteInput
}
override predicate isAdditionalFlowStep(
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
@@ -38,7 +38,7 @@ module NoSQLInjection {
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
sanitizer = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
}
}
@@ -52,3 +52,6 @@ module NoSQLInjection {
ConvertedToDict() { this = "ConvertedToDict" }
}
}
/** DEPRECATED: Alias for NoSqlInjection */
deprecated module NoSQLInjection = NoSqlInjection;

View File

@@ -25,7 +25,7 @@ module XSLTInjection {
ExternalXmlStringKind() { this = "etree.XML string" }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind
etreeXml(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind
or
@@ -40,7 +40,7 @@ module XSLTInjection {
ExternalXmlKind() { this = "lxml etree xml" }
}
private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) {
private predicate etreeXml(ControlFlowNode fromnode, CallNode tonode) {
// etree.XML("<xmlContent>")
exists(CallNode call | call.getFunction().(AttrNode).getObject("XML").pointsTo(etree()) |
call.getArg(0) = fromnode and

View File

@@ -8,7 +8,7 @@ import experimental.semmle.python.templates.SSTISink
deprecated ClassValue theAirspeedTemplateClass() { result = Value::named("airspeed.Template") }
/**
* Sink representing the `airspeed.Template` class instantiation argument.
* A sink representing the `airspeed.Template` class instantiation argument.
*
* import airspeed
* temp = airspeed.Template(`"sink"`)

View File

@@ -10,7 +10,7 @@ deprecated ClassValue theBottleSimpleTemplateClass() {
}
/**
* Sink representing the `bottle.SimpleTemplate` class instantiation argument.
* A sink representing the `bottle.SimpleTemplate` class instantiation argument.
*
* from bottle import SimpleTemplate
* template = SimpleTemplate(`sink`)
@@ -29,7 +29,7 @@ deprecated class BottleSimpleTemplateSink extends SSTISink {
}
/**
* Sink representing the `bottle.template` function call argument.
* A sink representing the `bottle.template` function call argument.
*
* from bottle import template
* tmp = template(`sink`)

View File

@@ -10,7 +10,7 @@ deprecated ClassValue theChameleonPageTemplateClass() {
}
/**
* Sink representing the `chameleon.PageTemplate` class instantiation argument.
* A sink representing the `chameleon.PageTemplate` class instantiation argument.
*
* from chameleon import PageTemplate
* template = PageTemplate(`sink`)

View File

@@ -10,7 +10,7 @@ deprecated ClassValue theCheetahTemplateClass() {
}
/**
* Sink representing the instantiation argument of any class which derives from
* A sink representing the instantiation argument of any class which derives from
* the `Cheetah.Template.Template` class .
*
* from Cheetah.Template import Template

View File

@@ -8,7 +8,7 @@ import experimental.semmle.python.templates.SSTISink
deprecated Value theChevronRenderFunc() { result = Value::named("chevron.render") }
/**
* Sink representing the `chevron.render` function call argument.
* A sink representing the `chevron.render` function call argument.
*
* import chevron
* tmp = chevron.render(`sink`,{ 'key' : 'value' })

View File

@@ -7,7 +7,7 @@ import experimental.semmle.python.templates.SSTISink
deprecated ClassValue theDjangoTemplateClass() { result = Value::named("django.template.Template") }
/**
* Sink representng `django.template.Template` class instantiation argument.
* A sink representng `django.template.Template` class instantiation argument.
*
* from django.template import Template
* template = Template(`sink`)

View File

@@ -9,7 +9,7 @@ deprecated Value theFlaskRenderTemplateClass() {
}
/**
* Sink representng `flask.render_template_string` function call argument.
* A sink representng `flask.render_template_string` function call argument.
*
* from flask import render_template_string
* render_template_string(`sink`)

View File

@@ -15,7 +15,7 @@ deprecated ClassValue theGenshiMarkupTemplateClass() {
}
/**
* Sink representing the `genshi.template.TextTemplate` class instantiation argument.
* A sink representing the `genshi.template.TextTemplate` class instantiation argument.
*
* from genshi.template import TextTemplate
* tmpl = TextTemplate('sink')
@@ -34,7 +34,7 @@ deprecated class GenshiTextTemplateSink extends SSTISink {
}
/**
* Sink representing the `genshi.template.MarkupTemplate` class instantiation argument.
* A sink representing the `genshi.template.MarkupTemplate` class instantiation argument.
*
* from genshi.template import MarkupTemplate
* tmpl = MarkupTemplate('sink')

View File

@@ -11,7 +11,7 @@ deprecated ClassValue theJinja2TemplateClass() { result = Value::named("jinja2.T
deprecated Value theJinja2FromStringValue() { result = Value::named("jinja2.from_string") }
/**
* Sink representing the `jinja2.Template` class instantiation argument.
* A sink representing the `jinja2.Template` class instantiation argument.
*
* from jinja2 import Template
* template = Template(`sink`)
@@ -30,7 +30,7 @@ deprecated class Jinja2TemplateSink extends SSTISink {
}
/**
* Sink representing the `jinja2.from_string` function call argument.
* A sink representing the `jinja2.from_string` function call argument.
*
* from jinja2 import from_string
* template = from_string(`sink`)

Some files were not shown because too many files have changed in this diff Show More