Merge branch 'main' into ZipSlip

This commit is contained in:
Ahmed Farid
2022-03-23 19:27:12 -04:00
committed by GitHub
1225 changed files with 44222 additions and 12547 deletions

View File

@@ -1,3 +1,9 @@
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).
## 0.0.10
### New Queries

View File

@@ -40,16 +40,6 @@ private predicate class_statement(Comment c) {
private predicate triple_quote(Comment c) { c.getText().regexpMatch("#.*(\"\"\"|''').*") }
private predicate triple_quoted_string_part(Comment start, Comment end) {
triple_quote(start) and end = start
or
exists(Comment mid |
triple_quoted_string_part(start, mid) and
end = non_empty_following(mid) and
not triple_quote(end)
)
}
private predicate maybe_code(Comment c) {
not non_code(c) and not filler(c) and not endline_comment(c) and not file_or_url(c)
or
@@ -158,11 +148,11 @@ private predicate commented_out_code_block(Comment start, Comment end) {
not commented_out_code(non_empty_following(end))
}
/* A single line comment that appears to be commented out code */
/** A single line comment that appears to be commented out code */
class CommentedOutCodeLine extends Comment {
CommentedOutCodeLine() { exists(CommentedOutCodeBlock b | b.contains(this)) }
/* Whether this commented-out code line is likely to be example code embedded in a larger comment. */
/** Holds if this commented-out code line is likely to be example code embedded in a larger comment. */
predicate maybeExampleCode() {
exists(CommentedOutCodeBlock block |
block.contains(this) and

View File

@@ -66,7 +66,7 @@ predicate passes_open_files(Variable v, ControlFlowNode test, boolean sense) {
)
}
/* Helper for `def_is_open` to give better join order */
// Helper for `def_is_open` to give better join order
private predicate passes_open_files(PyEdgeRefinement refinement) {
passes_open_files(refinement.getSourceVariable(), refinement.getPredecessor().getLastNode(),
refinement.getSense())

View File

@@ -40,13 +40,16 @@ private import semmle.python.objects.ObjectInternal
/**
* A callable that is considered a "safe" external API from a security perspective.
*/
class SafeExternalAPI extends Unit {
class SafeExternalApi extends Unit {
/** Gets a callable that is considered a "safe" external API from a security perspective. */
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
}
/** DEPRECATED: Alias for SafeExternalApi */
deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalAPI extends SafeExternalAPI {
private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
exists(CallableValue cv | cv = result.getCallableValue() |
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
@@ -61,15 +64,15 @@ private class DefaultSafeExternalAPI extends SafeExternalAPI {
}
/** A node representing data being passed to an external API through a call. */
class ExternalAPIDataNode extends DataFlow::Node {
class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCall call;
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalAPIDataNode() {
ExternalApiDataNode() {
exists(call.getLocation().getFile().getRelativePath()) and
callable = call.getCallable() and
not any(SafeExternalAPI safe).getSafeCallable() = callable and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
@@ -98,38 +101,47 @@ class ExternalAPIDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
/** DEPRECATED: Alias for ExternalApiDataNode */
deprecated class ExternalAPIDataNode = ExternalApiDataNode;
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalApiDataNode`s. */
class UntrustedDataToExternalApiConfig extends TaintTracking::Configuration {
UntrustedDataToExternalApiConfig() { this = "UntrustedDataToExternalAPIConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalApiDataNode }
}
/** DEPRECATED: Alias for UntrustedDataToExternalApiConfig */
deprecated class UntrustedDataToExternalAPIConfig = UntrustedDataToExternalApiConfig;
/** A node representing untrusted data being passed to an external API. */
class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
class UntrustedExternalApiDataNode extends ExternalApiDataNode {
UntrustedExternalApiDataNode() { any(UntrustedDataToExternalApiConfig c).hasFlow(_, this) }
/** Gets a source of untrusted data which is passed to this external API data node. */
DataFlow::Node getAnUntrustedSource() {
any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
any(UntrustedDataToExternalApiConfig c).hasFlow(result, this)
}
}
private newtype TExternalAPI =
TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalAPIDataNode n |
/** DEPRECATED: Alias for UntrustedExternalApiDataNode */
deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
private newtype TExternalApi =
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalApiDataNode n |
callable = n.getCallable() and
index = n.getIndex()
)
}
/** An external API which is used with untrusted data. */
class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
class ExternalApiUsedWithUntrustedData extends TExternalApi {
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalAPIDataNode getUntrustedDataNode() {
this = TExternalAPIParameter(result.getCallable(), result.getIndex())
UntrustedExternalApiDataNode getUntrustedDataNode() {
this = TExternalApiParameter(result.getCallable(), result.getIndex())
}
/** Gets the number of untrusted sources used with this external API. */
@@ -143,7 +155,7 @@ class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalAPIParameter(callable, index) and
this = TExternalApiParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
@@ -167,6 +179,9 @@ class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
}
}
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |

View File

@@ -11,7 +11,7 @@
import python
import ExternalAPIs
from ExternalAPIUsedWithUntrustedData externalAPI
select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
from ExternalApiUsedWithUntrustedData externalApi
select externalApi, count(externalApi.getUntrustedDataNode()) as numberOfUses,
externalApi.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
numberOfUntrustedSources desc

View File

@@ -14,11 +14,11 @@ import ExternalAPIs
import DataFlow::PathGraph
from
UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
ExternalAPIUsedWithUntrustedData externalAPI
UntrustedDataToExternalApiConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
ExternalApiUsedWithUntrustedData externalApi
where
sink.getNode() = externalAPI.getUntrustedDataNode() and
sink.getNode() = externalApi.getUntrustedDataNode() and
config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Call to " + externalAPI.toString() + " with untrusted data from $@.", source.getNode(),
"Call to " + externalApi.toString() + " with untrusted data from $@.", source.getNode(),
source.toString()

View File

@@ -0,0 +1,202 @@
/**
* Provides predicates for reasoning about regular expressions
* that match URLs and hostname patterns.
*/
private import HostnameRegexpSpecific
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
*/
predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
// Look for any of these cases:
// - A character that can't occur in the origin
// - Two dashes in a row
// - A colon that is not part of port or scheme separator
// - A slash that is not part of scheme separator
term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
}
/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
predicate isDotConstant(RegExpTerm term) {
term.(RegExpCharEscape).getValue() = "."
or
exists(RegExpCharacterClass cls |
term = cls and
not cls.isInverted() and
cls.getNumChild() = 1 and
cls.getAChild().(RegExpConstant).getValue() = "."
)
}
/** Holds if `term` is a wildcard `.` or an actual `.` character. */
predicate isDotLike(RegExpTerm term) {
term instanceof RegExpDot
or
isDotConstant(term)
}
/** Holds if `term` will only ever be matched against the beginning of the input. */
predicate matchesBeginningOfString(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent | matchesBeginningOfString(parent) |
term = parent.(RegExpSequence).getChild(0)
or
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
term = parent.(RegExpSequence).getChild(1)
or
term = parent.(RegExpAlt).getAChild()
or
term = parent.(RegExpGroup).getAChild()
)
}
/**
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
* excluding cases where this is at the very beginning of the regexp.
*
* `i` is bound to the index of the last child in the top-level domain part.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
/**
* Holds if the given regular expression term contains top-level domain preceded by a dot,
* such as `.com`.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
/**
* Holds if `term` will always match a hostname, that is, all disjunctions contain
* a hostname pattern that isn't inside a quantifier.
*/
predicate alwaysMatchesHostname(RegExpTerm term) {
hasTopLevelDomainEnding(term, _)
or
// `localhost` is considered a hostname pattern, but has no TLD
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
or
not term instanceof RegExpAlt and
not term instanceof RegExpQuantifier and
alwaysMatchesHostname(term.getAChild())
or
alwaysMatchesHostnameAlt(term)
}
/** Holds if every child of `alt` contains a hostname pattern. */
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
}
/**
* Holds if the first `i` children of `alt` contains a hostname pattern.
*
* This is used instead of `forall` to avoid materializing the set of alternatives
* that don't contains hostnames, which is much larger.
*/
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
alwaysMatchesHostname(alt.getChild(0)) and i = 0
or
alwaysMatchesHostnameAlt(alt, i - 1) and
alwaysMatchesHostname(alt.getChild(i))
}
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
) {
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}

View File

@@ -0,0 +1,3 @@
import semmle.python.security.performance.RegExpTreeView
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.Regexp

View File

@@ -30,7 +30,7 @@
<p>
Escape all meta-characters appropriately when constructing
regular expressions for security checks, pay special attention to the
regular expressions for security checks, and pay special attention to the
<code>.</code> meta-character.
</p>

View File

@@ -8,35 +8,9 @@
* @id py/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
* external/cwe/cwe-020
*/
import python
import semmle.python.regex
import HostnameRegexpShared
private string commonTopLevelDomainRegex() { result = "com|org|edu|gov|uk|net|io" }
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart =
pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + commonTopLevelDomainRegex() + "))" + ".*", 1)
}
from Regex r, string pattern, string hostPart
where
r.getText() = pattern and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + commonTopLevelDomainRegex() + ").*[(][?]:.*[)].*")
select r,
"This regular expression has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected."
query predicate problems = incompleteHostnameRegExp/4;

View File

@@ -17,7 +17,7 @@ import python
import semmle.python.security.dataflow.ReflectedXSS
import DataFlow::PathGraph
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from ReflectedXss::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -16,6 +16,6 @@
import semmle.python.security.BadTagFilterQuery
from HTMLMatchingRegExp regexp, string msg
from HtmlMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
select regexp, msg

View File

@@ -259,7 +259,7 @@ predicate file_consistency(string clsname, string problem, string what) {
exists(Container f |
clsname = f.getAQlClass() and
uniqueness_error(count(f.toString()), "toString", problem) and
what = "file " + f.getName()
what = "file " + f.getAbsolutePath()
)
}

View File

@@ -1,4 +1,5 @@
---
category: newQuery
---
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.10
lastReleaseVersion: 0.0.11

View File

@@ -13,7 +13,7 @@ import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
predicate authenticatesImproperly(LDAPBind ldapBind) {
predicate authenticatesImproperly(LdapBind ldapBind) {
(
DataFlow::localFlow(DataFlow::exprNode(any(None noneName)), ldapBind.getPassword()) or
not exists(ldapBind.getPassword())
@@ -25,6 +25,6 @@ predicate authenticatesImproperly(LDAPBind ldapBind) {
)
}
from LDAPBind ldapBind
from LdapBind ldapBind
where authenticatesImproperly(ldapBind)
select ldapBind, "The following LDAP bind operation is executed without authentication"

View File

@@ -12,7 +12,7 @@ import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.JWT
from JWTEncoding jwtEncoding, string affectedComponent
from JwtEncoding jwtEncoding, string affectedComponent
where
affectedComponent = "algorithm" and
isEmptyOrNone(jwtEncoding.getAlgorithm())

View File

@@ -12,6 +12,6 @@
import python
import experimental.semmle.python.Concepts
from JWTDecoding jwtDecoding
from JwtDecoding jwtDecoding
where not jwtDecoding.verifiesSignature()
select jwtDecoding.getPayload(), "is not verified with a cryptographic secret or public key."

View File

@@ -14,7 +14,7 @@ import python
import DataFlow::PathGraph
import experimental.semmle.python.security.LDAPInsecureAuth
from LDAPInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LdapInsecureAuthConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ is authenticated insecurely.", sink.getNode(),
"This LDAP host"

View File

@@ -0,0 +1,25 @@
/**
* @name SimpleXMLRPCServer DoS vulnerability
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
* @kind problem
* @problem.severity warning
* @precision high
* @id py/simple-xml-rpc-server-dos
* @tags security
* external/cwe/cwe-776
*/
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
from DataFlow::CallCfgNode call, string kinds
where
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
kinds =
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
kind, ", "
)
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."

View File

@@ -0,0 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<test>&xxe;</test>

View File

@@ -0,0 +1,25 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
app = Flask(__name__)
# BAD
@app.route("/bad")
def bad():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text
# GOOD
@app.route("/good")
def good():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -0,0 +1,48 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
</p>
</overview>
<recommendation>
<p>
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
to prevent any potentially malicious operation.
</p>
</recommendation>
<example>
<p>
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
</p>
<sample src="XmlEntityInjection.py"/>
<p>
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
<code>/etc/passwd</code>.
</p>
<sample src="XXE.xml"/>
</example>
<references>
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
<li>Out-of-band data retrieval: Timur Yunusov &amp; Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name XML Entity injection
* @description User input should not be parsed allowing the injection of entities.
* @kind path-problem
* @problem.severity error
* @id py/xml-entity-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
from
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
DataFlow::PathNode sink, string kinds
where
config.hasFlowPath(source, sink) and
kinds =
strictconcat(string kind |
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
|
kind, ", "
)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -13,7 +13,7 @@ import python
import experimental.semmle.python.security.injection.NoSQLInjection
import DataFlow::PathGraph
from NoSQLInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from NoSqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
"user-provided value"

View File

@@ -14,6 +14,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling copying file related APIs. */
module CopyFile {
/**
@@ -72,10 +73,77 @@ class LogOutput extends DataFlow::Node {
LogOutput() { this = range }
DataFlow::Node getAnInput() { result = range.getAnInput() }
/**
* Since there is both XML module in normal and experimental Concepts,
* we have to rename the experimental module as this.
*/
module ExperimentalXML {
/**
* A kind of XML vulnerability.
*
* See https://pypi.org/project/defusedxml/#python-xml-libraries
*/
class XMLVulnerabilityKind extends string {
XMLVulnerabilityKind() {
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
}
/** Holds for Billion Laughs vulnerability kind. */
predicate isBillionLaughs() { this = "Billion Laughs" }
/** Holds for Quadratic Blowup vulnerability kind. */
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the content to parse.
*/
abstract DataFlow::Node getAnInput();
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
}
}
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
module LdapQuery {
/**
* A data-flow node that collects methods executing a LDAP query.
*
@@ -90,16 +158,19 @@ module LDAPQuery {
}
}
/** DEPRECATED: Alias for LdapQuery */
deprecated module LDAPQuery = LdapQuery;
/**
* A data-flow node that collect methods executing a LDAP query.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LDAPQuery extends DataFlow::Node {
LDAPQuery::Range range;
class LdapQuery extends DataFlow::Node {
LdapQuery::Range range;
LDAPQuery() { this = range }
LdapQuery() { this = range }
/**
* Gets the argument containing the executed expression.
@@ -107,8 +178,11 @@ class LDAPQuery extends DataFlow::Node {
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** DEPRECATED: Alias for LdapQuery */
deprecated class LDAPQuery = LdapQuery;
/** Provides classes for modeling LDAP components escape-related APIs. */
module LDAPEscape {
module LdapEscape {
/**
* A data-flow node that collects functions escaping LDAP components.
*
@@ -123,16 +197,19 @@ module LDAPEscape {
}
}
/** DEPRECATED: Alias for LdapEscape */
deprecated module LDAPEscape = LdapEscape;
/**
* A data-flow node that collects functions escaping LDAP components.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPEscape::Range` instead.
*/
class LDAPEscape extends DataFlow::Node {
LDAPEscape::Range range;
class LdapEscape extends DataFlow::Node {
LdapEscape::Range range;
LDAPEscape() { this = range }
LdapEscape() { this = range }
/**
* Gets the argument containing the escaped expression.
@@ -140,8 +217,11 @@ class LDAPEscape extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** DEPRECATED: Alias for LdapEscape */
deprecated class LDAPEscape = LdapEscape;
/** Provides classes for modeling LDAP bind-related APIs. */
module LDAPBind {
module LdapBind {
/**
* A data-flow node that collects methods binding a LDAP connection.
*
@@ -166,16 +246,19 @@ module LDAPBind {
}
}
/** DEPRECATED: Alias for LdapBind */
deprecated module LDAPBind = LdapBind;
/**
* A data-flow node that collects methods binding a LDAP connection.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPBind::Range` instead.
*/
class LDAPBind extends DataFlow::Node {
LDAPBind::Range range;
class LdapBind extends DataFlow::Node {
LdapBind::Range range;
LDAPBind() { this = range }
LdapBind() { this = range }
/**
* Gets the argument containing the binding host.
@@ -193,8 +276,11 @@ class LDAPBind extends DataFlow::Node {
predicate useSSL() { range.useSSL() }
}
/** DEPRECATED: Alias for LdapBind */
deprecated class LDAPBind = LdapBind;
/** Provides classes for modeling SQL sanitization libraries. */
module SQLEscape {
module SqlEscape {
/**
* A data-flow node that collects functions that escape SQL statements.
*
@@ -209,16 +295,19 @@ module SQLEscape {
}
}
/** DEPRECATED: Alias for SqlEscape */
deprecated module SQLEscape = SqlEscape;
/**
* A data-flow node that collects functions escaping SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SQLEscape::Range` instead.
*/
class SQLEscape extends DataFlow::Node {
SQLEscape::Range range;
class SqlEscape extends DataFlow::Node {
SqlEscape::Range range;
SQLEscape() { this = range }
SqlEscape() { this = range }
/**
* Gets the argument containing the raw SQL statement.
@@ -226,8 +315,11 @@ class SQLEscape extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides a class for modeling NoSQL execution APIs. */
module NoSQLQuery {
/** DEPRECATED: Alias for SqlEscape */
deprecated class SQLEscape = SqlEscape;
/** Provides a class for modeling NoSql execution APIs. */
module NoSqlQuery {
/**
* A data-flow node that executes NoSQL queries.
*
@@ -235,28 +327,34 @@ module NoSQLQuery {
* extend `NoSQLQuery` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be executed. */
/** Gets the argument that specifies the NoSql query to be executed. */
abstract DataFlow::Node getQuery();
}
}
/** DEPRECATED: Alias for NoSqlQuery */
deprecated module NoSQLQuery = NoSqlQuery;
/**
* A data-flow node that executes NoSQL queries.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `NoSQLQuery::Range` instead.
*/
class NoSQLQuery extends DataFlow::Node {
NoSQLQuery::Range range;
class NoSqlQuery extends DataFlow::Node {
NoSqlQuery::Range range;
NoSQLQuery() { this = range }
NoSqlQuery() { this = range }
/** Gets the argument that specifies the NoSQL query to be executed. */
/** Gets the argument that specifies the NoSql query to be executed. */
DataFlow::Node getQuery() { result = range.getQuery() }
}
/** Provides classes for modeling NoSQL sanitization-related APIs. */
module NoSQLSanitizer {
/** DEPRECATED: Alias for NoSqlQuery */
deprecated class NoSQLQuery = NoSqlQuery;
/** Provides classes for modeling NoSql sanitization-related APIs. */
module NoSqlSanitizer {
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
@@ -264,26 +362,32 @@ module NoSQLSanitizer {
* extend `NoSQLSanitizer` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the NoSQL query to be sanitized. */
/** Gets the argument that specifies the NoSql query to be sanitized. */
abstract DataFlow::Node getAnInput();
}
}
/** DEPRECATED: Alias for NoSqlSanitizer */
deprecated module NoSQLSanitizer = NoSqlSanitizer;
/**
* A data-flow node that collects functions sanitizing NoSQL queries.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `NoSQLSanitizer::Range` instead.
*/
class NoSQLSanitizer extends DataFlow::Node {
NoSQLSanitizer::Range range;
class NoSqlSanitizer extends DataFlow::Node {
NoSqlSanitizer::Range range;
NoSQLSanitizer() { this = range }
NoSqlSanitizer() { this = range }
/** Gets the argument that specifies the NoSQL query to be sanitized. */
/** Gets the argument that specifies the NoSql query to be sanitized. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** DEPRECATED: Alias for NoSqlSanitizer */
deprecated class NoSQLSanitizer = NoSqlSanitizer;
/** Provides classes for modeling HTTP Header APIs. */
module HeaderDeclaration {
/**
@@ -328,7 +432,7 @@ class HeaderDeclaration extends DataFlow::Node {
}
/** Provides classes for modeling JWT encoding-related APIs. */
module JWTEncoding {
module JwtEncoding {
/**
* A data-flow node that collects methods encoding a JWT token.
*
@@ -358,13 +462,16 @@ module JWTEncoding {
}
}
/** DEPRECATED: Alias for JwtEncoding */
deprecated module JWTEncoding = JwtEncoding;
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTEncoding::Range` instead.
*/
class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
class JwtEncoding extends DataFlow::Node instanceof JwtEncoding::Range {
/**
* Gets the argument containing the payload.
*/
@@ -386,8 +493,11 @@ class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
string getAlgorithmString() { result = super.getAlgorithmString() }
}
/** DEPRECATED: Alias for JwtEncoding */
deprecated class JWTEncoding = JwtEncoding;
/** Provides classes for modeling JWT decoding-related APIs. */
module JWTDecoding {
module JwtDecoding {
/**
* A data-flow node that collects methods decoding a JWT token.
*
@@ -427,13 +537,16 @@ module JWTDecoding {
}
}
/** DEPRECATED: Alias for JwtDecoding */
deprecated module JWTDecoding = JwtDecoding;
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTDecoding::Range` instead.
*/
class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
class JwtDecoding extends DataFlow::Node instanceof JwtDecoding::Range {
/**
* Gets the argument containing the payload.
*/
@@ -464,3 +577,6 @@ class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
*/
predicate verifiesSignature() { super.verifiesSignature() }
}
/** DEPRECATED: Alias for JwtDecoding */
deprecated class JWTDecoding = JwtDecoding;

View File

@@ -3,6 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug

View File

@@ -11,15 +11,15 @@ private import semmle.python.ApiGraphs
import semmle.python.dataflow.new.RemoteFlowSources
private module ExperimentalPrivateDjango {
private module django {
private module DjangoMod {
API::Node http() { result = API::moduleImport("django").getMember("http") }
module http {
module Http {
API::Node response() { result = http().getMember("response") }
API::Node request() { result = http().getMember("request") }
module request {
module Request {
module HttpRequest {
class DjangoGETParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGETParameter() { this = request().getMember("GET").getMember("get").getACall() }
@@ -29,7 +29,7 @@ private module ExperimentalPrivateDjango {
}
}
module response {
module Response {
module HttpResponse {
API::Node baseClassRef() {
result = response().getMember("HttpResponse").getReturn()

View File

@@ -60,7 +60,7 @@ private module LDAP {
*
* See `LDAP2QueryMethods`
*/
private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
private class LDAP2Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP2Query() { this.getFunction() = ldapQuery() }
override DataFlow::Node getQuery() {
@@ -98,7 +98,7 @@ private module LDAP {
*
* See `LDAP2BindMethods`
*/
private class LDAP2Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
private class LDAP2Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP2Bind() { this.getFunction() = ldapBind() }
override DataFlow::Node getPassword() {
@@ -149,7 +149,7 @@ private module LDAP {
*
* See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
*/
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeDNCall() { this = ldap().getMember("dn").getMember("escape_dn_chars").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
@@ -160,7 +160,7 @@ private module LDAP {
*
* See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
*/
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP2EscapeFilterCall() {
this = ldap().getMember("filter").getMember("escape_filter_chars").getACall()
}
@@ -190,7 +190,7 @@ private module LDAP {
/**
* A class to find `ldap3` methods executing a query.
*/
private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
private class LDAP3Query extends DataFlow::CallCfgNode, LdapQuery::Range {
LDAP3Query() {
this.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() =
ldap3Connection().getACall() and
@@ -203,7 +203,7 @@ private module LDAP {
/**
* A class to find `ldap3` methods binding a connection.
*/
class LDAP3Bind extends DataFlow::CallCfgNode, LDAPBind::Range {
class LDAP3Bind extends DataFlow::CallCfgNode, LdapBind::Range {
LDAP3Bind() { this = ldap3Connection().getACall() }
override DataFlow::Node getPassword() {
@@ -241,7 +241,7 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
*/
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeDNCall() { this = ldap3Utils().getMember("dn").getMember("escape_rdn").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
@@ -252,7 +252,7 @@ private module LDAP {
*
* See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
*/
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LdapEscape::Range {
LDAP3EscapeFilterCall() {
this = ldap3Utils().getMember("conv").getMember("escape_filter_chars").getACall()
}

View File

@@ -10,7 +10,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module NoSQL {
private module NoSql {
// API Nodes returning `Mongo` instances.
/** Gets a reference to `pymongo.MongoClient` */
private API::Node pyMongo() {
@@ -153,7 +153,7 @@ private module NoSQL {
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
override DataFlow::Node getQuery() { result = this.getArg(0) }
@@ -174,7 +174,7 @@ private module NoSQL {
*
* `Movie.objects(__raw__=json_search)` would be the result.
*/
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoEngineObjectsCall() {
this =
[mongoEngine(), flask_MongoEngine()]
@@ -188,7 +188,7 @@ private module NoSQL {
}
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
MongoSanitizerCall() {
this =
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
@@ -202,7 +202,7 @@ private module NoSQL {
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
* then ObjectId will throw an error preventing the query from running.
*/
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
BsonObjectIdCall() {
this =
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])

View File

@@ -12,7 +12,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Werkzeug {
module datastructures {
module Datastructures {
module Headers {
class WerkzeugHeaderAddCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
WerkzeugHeaderAddCall() {

View File

@@ -0,0 +1,466 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
module XML = ExperimentalXML;
private module XmlEtree {
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XMLParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getACall()
or
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLPullParser")
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
private module SaxBasedParsing {
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(SaxParserSetFeatureCall c).getStateArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 |
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
)
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
}
private module Lxml {
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XMLParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
or
kind.isDtdRetrieval() and
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
}
}
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
}

View File

@@ -5,7 +5,7 @@ private import experimental.semmle.python.frameworks.JWT
private module Authlib {
/** Gets a reference to `authlib.jose.(jwt|JsonWebToken)` */
private API::Node authlibJWT() {
private API::Node authlibJwt() {
result in [
API::moduleImport("authlib").getMember("jose").getMember("jwt"),
API::moduleImport("authlib").getMember("jose").getMember("JsonWebToken").getReturn()
@@ -13,10 +13,10 @@ private module Authlib {
}
/** Gets a reference to `jwt.encode` */
private API::Node authlibJWTEncode() { result = authlibJWT().getMember("encode") }
private API::Node authlibJwtEncode() { result = authlibJwt().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node authlibJWTDecode() { result = authlibJWT().getMember("decode") }
private API::Node authlibJwtDecode() { result = authlibJwt().getMember("decode") }
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).encode`.
@@ -33,8 +33,8 @@ private module Authlib {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class AuthlibJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
AuthlibJWTEncodeCall() { this = authlibJWTEncode().getACall() }
private class AuthlibJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
AuthlibJwtEncodeCall() { this = authlibJwtEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(1) }
@@ -69,8 +69,8 @@ private module Authlib {
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
*/
private class AuthlibJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
AuthlibJWTDecodeCall() { this = authlibJWTDecode().getACall() }
private class AuthlibJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
AuthlibJwtDecodeCall() { this = authlibJwtDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }

View File

@@ -3,7 +3,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PyJWT {
private module PyJwt {
/** Gets a reference to `jwt.encode` */
private API::Node pyjwtEncode() { result = API::moduleImport("jwt").getMember("encode") }
@@ -25,8 +25,8 @@ private module PyJWT {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class PyJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
PyJWTEncodeCall() { this = pyjwtEncode().getACall() }
private class PyJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
PyJwtEncodeCall() { this = pyjwtEncode().getACall() }
override DataFlow::Node getPayload() {
result in [this.getArg(0), this.getArgByName("payload")]
@@ -63,8 +63,8 @@ private module PyJWT {
* * `getOptions()`'s result would be `{"verify_signature": True}`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PyJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
PyJWTDecodeCall() { this = pyjwtDecode().getACall() }
private class PyJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
PyJwtDecodeCall() { this = pyjwtDecode().getACall() }
override DataFlow::Node getPayload() { result in [this.getArg(0), this.getArgByName("jwt")] }

View File

@@ -5,13 +5,13 @@ private import experimental.semmle.python.frameworks.JWT
private module PythonJose {
/** Gets a reference to `jwt` */
private API::Node joseJWT() { result = API::moduleImport("jose").getMember("jwt") }
private API::Node joseJwt() { result = API::moduleImport("jose").getMember("jwt") }
/** Gets a reference to `jwt.encode` */
private API::Node joseJWTEncode() { result = joseJWT().getMember("encode") }
private API::Node joseJwtEncode() { result = joseJwt().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node joseJWTDecode() { result = joseJWT().getMember("decode") }
private API::Node joseJwtDecode() { result = joseJwt().getMember("decode") }
/**
* Gets a call to `jwt.encode`.
@@ -28,8 +28,8 @@ private module PythonJose {
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class JoseJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
JoseJWTEncodeCall() { this = joseJWTEncode().getACall() }
private class JoseJwtEncodeCall extends DataFlow::CallCfgNode, JwtEncoding::Range {
JoseJwtEncodeCall() { this = joseJwtEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
@@ -64,8 +64,8 @@ private module PythonJose {
* * `getOptions()`'s result would be none.
* * `verifiesSignature()` predicate would succeed.
*/
private class JoseJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
JoseJWTDecodeCall() { this = joseJWTDecode().getACall() }
private class JoseJwtDecodeCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
JoseJwtDecodeCall() { this = joseJwtDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }

View File

@@ -2,7 +2,7 @@ private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Python_JWT {
private module Python_Jwt {
/**
* Gets a call to `python_jwt.process_jwt`.
*
@@ -21,7 +21,7 @@ private module Python_JWT {
* * `getOptions()`'s result would be `none()`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PythonJwtProcessCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
private class PythonJwtProcessCall extends DataFlow::CallCfgNode, JwtDecoding::Range {
PythonJwtProcessCall() {
this = API::moduleImport("python_jwt").getMember("process_jwt").getACall()
}

View File

@@ -18,8 +18,8 @@ string getPrivateHostRegex() {
}
// "ldap://somethingon.theinternet.com"
class LDAPFullHost extends StrConst {
LDAPFullHost() {
class LdapFullHost extends StrConst {
LdapFullHost() {
exists(string s |
s = this.getText() and
s.regexpMatch(getFullHostRegex()) and
@@ -29,27 +29,39 @@ class LDAPFullHost extends StrConst {
}
}
class LDAPSchema extends StrConst {
LDAPSchema() { this.getText().regexpMatch(getSchemaRegex()) }
/** DEPRECATED: Alias for LdapFullHost */
deprecated class LDAPFullHost = LdapFullHost;
class LdapSchema extends StrConst {
LdapSchema() { this.getText().regexpMatch(getSchemaRegex()) }
}
class LDAPPrivateHost extends StrConst {
LDAPPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
/** DEPRECATED: Alias for LdapSchema */
deprecated class LDAPSchema = LdapSchema;
class LdapPrivateHost extends StrConst {
LdapPrivateHost() { this.getText().regexpMatch(getPrivateHostRegex()) }
}
predicate concatAndCompareAgainstFullHostRegex(LDAPSchema schema, StrConst host) {
not host instanceof LDAPPrivateHost and
/** DEPRECATED: Alias for LdapPrivateHost */
deprecated class LDAPPrivateHost = LdapPrivateHost;
predicate concatAndCompareAgainstFullHostRegex(LdapSchema schema, StrConst host) {
not host instanceof LdapPrivateHost and
(schema.getText() + host.getText()).regexpMatch(getFullHostRegex())
}
// "ldap://" + "somethingon.theinternet.com"
class LDAPBothStrings extends BinaryExpr {
LDAPBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
class LdapBothStrings extends BinaryExpr {
LdapBothStrings() { concatAndCompareAgainstFullHostRegex(this.getLeft(), this.getRight()) }
}
/** DEPRECATED: Alias for LdapBothStrings */
deprecated class LDAPBothStrings = LdapBothStrings;
// schema + host
class LDAPBothVar extends BinaryExpr {
LDAPBothVar() {
class LdapBothVar extends BinaryExpr {
LdapBothVar() {
exists(SsaVariable schemaVar, SsaVariable hostVar |
this.getLeft() = schemaVar.getVariable().getALoad() and // getAUse is incompatible with Expr
this.getRight() = hostVar.getVariable().getALoad() and
@@ -61,9 +73,12 @@ class LDAPBothVar extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapBothVar */
deprecated class LDAPBothVar = LdapBothVar;
// schema + "somethingon.theinternet.com"
class LDAPVarString extends BinaryExpr {
LDAPVarString() {
class LdapVarString extends BinaryExpr {
LdapVarString() {
exists(SsaVariable schemaVar |
this.getLeft() = schemaVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(schemaVar
@@ -74,9 +89,12 @@ class LDAPVarString extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapVarString */
deprecated class LDAPVarString = LdapVarString;
// "ldap://" + host
class LDAPStringVar extends BinaryExpr {
LDAPStringVar() {
class LdapStringVar extends BinaryExpr {
LdapStringVar() {
exists(SsaVariable hostVar |
this.getRight() = hostVar.getVariable().getALoad() and
concatAndCompareAgainstFullHostRegex(this.getLeft(),
@@ -85,22 +103,28 @@ class LDAPStringVar extends BinaryExpr {
}
}
/** DEPRECATED: Alias for LdapStringVar */
deprecated class LDAPStringVar = LdapStringVar;
/**
* A taint-tracking configuration for detecting LDAP insecure authentications.
*/
class LDAPInsecureAuthConfig extends TaintTracking::Configuration {
LDAPInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
class LdapInsecureAuthConfig extends TaintTracking::Configuration {
LdapInsecureAuthConfig() { this = "LDAPInsecureAuthConfig" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSource or
source.asExpr() instanceof LDAPFullHost or
source.asExpr() instanceof LDAPBothStrings or
source.asExpr() instanceof LDAPBothVar or
source.asExpr() instanceof LDAPVarString or
source.asExpr() instanceof LDAPStringVar
source.asExpr() instanceof LdapFullHost or
source.asExpr() instanceof LdapBothStrings or
source.asExpr() instanceof LdapBothVar or
source.asExpr() instanceof LdapVarString or
source.asExpr() instanceof LdapStringVar
}
override predicate isSink(DataFlow::Node sink) {
exists(LDAPBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
exists(LdapBind ldapBind | not ldapBind.useSSL() and sink = ldapBind.getHost())
}
}
/** DEPRECATED: Alias for LdapInsecureAuthConfig */
deprecated class LDAPInsecureAuthConfig = LdapInsecureAuthConfig;

View File

@@ -0,0 +1,28 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
module XmlEntityInjection {
import XmlEntityInjectionCustomizations::XmlEntityInjection
class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
}

View File

@@ -0,0 +1,86 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlEntityInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the kind of XML injection that this sink is vulnerable to. */
abstract string getVulnerableKind();
}
/**
* A sanitizer guard for "xml injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A unit class for adding additional taint steps.
*
* Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
* taint configuration.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
* step for `XmlEntityInjection` configuration.
*/
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
}
/**
* An input to a direct XML parsing function, considered as a flow sink.
*
* See `XML::XMLParsing`.
*/
class XMLParsingInputAsSink extends Sink {
ExperimentalXML::XMLParsing xmlParsing;
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
}
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A taint step for `io`'s `StringIO` and `BytesIO` methods.
*/
class IoAdditionalTaintStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}
}

View File

@@ -5,7 +5,7 @@ import semmle.python.dataflow.new.RemoteFlowSources
import experimental.semmle.python.Concepts
import semmle.python.Concepts
module NoSQLInjection {
module NoSqlInjection {
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NoSQLInjection" }
@@ -15,17 +15,17 @@ module NoSQLInjection {
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink = any(NoSQLQuery noSQLQuery).getQuery() and
sink = any(NoSqlQuery noSqlQuery).getQuery() and
state instanceof ConvertedToDict
}
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
// Block `RemoteInput` paths here, since they change state to `ConvertedToDict`
exists(Decoding decoding | decoding.getFormat() = "JSON" and node = decoding.getOutput()) and
state instanceof RemoteInput
}
override predicate isAdditionalFlowStep(
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
@@ -38,7 +38,7 @@ module NoSQLInjection {
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
sanitizer = any(NoSqlSanitizer noSqlSanitizer).getAnInput()
}
}
@@ -52,3 +52,6 @@ module NoSQLInjection {
ConvertedToDict() { this = "ConvertedToDict" }
}
}
/** DEPRECATED: Alias for NoSqlInjection */
deprecated module NoSQLInjection = NoSqlInjection;

View File

@@ -25,7 +25,7 @@ module XSLTInjection {
ExternalXmlStringKind() { this = "etree.XML string" }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind
etreeXml(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind
or
@@ -40,7 +40,7 @@ module XSLTInjection {
ExternalXmlKind() { this = "lxml etree xml" }
}
private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) {
private predicate etreeXml(ControlFlowNode fromnode, CallNode tonode) {
// etree.XML("<xmlContent>")
exists(CallNode call | call.getFunction().(AttrNode).getObject("XML").pointsTo(etree()) |
call.getArg(0) = fromnode and

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.0.11-dev
version: 0.0.12-dev
groups:
- python
- queries