Merge branch 'main' into edoardo/3.5-mergeback

This commit is contained in:
Edoardo Pirovano
2022-04-08 15:30:58 +01:00
684 changed files with 24297 additions and 21172 deletions

View File

@@ -0,0 +1,202 @@
/**
* Provides predicates for reasoning about regular expressions
* that match URLs and hostname patterns.
*/
private import HostnameRegexpSpecific
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
*/
predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
// Look for any of these cases:
// - A character that can't occur in the origin
// - Two dashes in a row
// - A colon that is not part of port or scheme separator
// - A slash that is not part of scheme separator
term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
}
/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
predicate isDotConstant(RegExpTerm term) {
term.(RegExpCharEscape).getValue() = "."
or
exists(RegExpCharacterClass cls |
term = cls and
not cls.isInverted() and
cls.getNumChild() = 1 and
cls.getAChild().(RegExpConstant).getValue() = "."
)
}
/** Holds if `term` is a wildcard `.` or an actual `.` character. */
predicate isDotLike(RegExpTerm term) {
term instanceof RegExpDot
or
isDotConstant(term)
}
/** Holds if `term` will only ever be matched against the beginning of the input. */
predicate matchesBeginningOfString(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent | matchesBeginningOfString(parent) |
term = parent.(RegExpSequence).getChild(0)
or
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
term = parent.(RegExpSequence).getChild(1)
or
term = parent.(RegExpAlt).getAChild()
or
term = parent.(RegExpGroup).getAChild()
)
}
/**
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
* excluding cases where this is at the very beginning of the regexp.
*
* `i` is bound to the index of the last child in the top-level domain part.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
/**
* Holds if the given regular expression term contains top-level domain preceded by a dot,
* such as `.com`.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
/**
* Holds if `term` will always match a hostname, that is, all disjunctions contain
* a hostname pattern that isn't inside a quantifier.
*/
predicate alwaysMatchesHostname(RegExpTerm term) {
hasTopLevelDomainEnding(term, _)
or
// `localhost` is considered a hostname pattern, but has no TLD
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
or
not term instanceof RegExpAlt and
not term instanceof RegExpQuantifier and
alwaysMatchesHostname(term.getAChild())
or
alwaysMatchesHostnameAlt(term)
}
/** Holds if every child of `alt` contains a hostname pattern. */
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
}
/**
* Holds if the first `i` children of `alt` contains a hostname pattern.
*
* This is used instead of `forall` to avoid materializing the set of alternatives
* that don't contains hostnames, which is much larger.
*/
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
alwaysMatchesHostname(alt.getChild(0)) and i = 0
or
alwaysMatchesHostnameAlt(alt, i - 1) and
alwaysMatchesHostname(alt.getChild(i))
}
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
) {
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}

View File

@@ -0,0 +1,3 @@
import semmle.python.RegexTreeView
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.Regexp

View File

@@ -8,35 +8,9 @@
* @id py/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
* external/cwe/cwe-020
*/
import python
import semmle.python.regex
import HostnameRegexpShared
private string commonTopLevelDomainRegex() { result = "com|org|edu|gov|uk|net|io" }
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart =
pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + commonTopLevelDomainRegex() + "))" + ".*", 1)
}
from Regex r, string pattern, string hostPart
where
r.getText() = pattern and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + commonTopLevelDomainRegex() + ").*[(][?]:.*[)].*")
select r,
"This regular expression has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected."
query predicate problems = incompleteHostnameRegExp/4;

View File

@@ -17,10 +17,10 @@
*/
import python
import semmle.python.security.dataflow.PathInjection
import semmle.python.security.dataflow.PathInjectionQuery
import DataFlow::PathGraph
from PathInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This path depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -15,10 +15,10 @@
*/
import python
import semmle.python.security.dataflow.CommandInjection
import semmle.python.security.dataflow.CommandInjectionQuery
import DataFlow::PathGraph
from CommandInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -14,10 +14,10 @@
*/
import python
import semmle.python.security.dataflow.ReflectedXSS
import semmle.python.security.dataflow.ReflectedXssQuery
import DataFlow::PathGraph
from ReflectedXss::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -12,10 +12,10 @@
*/
import python
import semmle.python.security.dataflow.SqlInjection
import semmle.python.security.dataflow.SqlInjectionQuery
import DataFlow::PathGraph
from SqlInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -13,15 +13,15 @@
// Determine precision above
import python
import semmle.python.security.dataflow.LdapInjection
import semmle.python.security.dataflow.LdapInjectionQuery
import DataFlow::PathGraph
from DataFlow::PathNode source, DataFlow::PathNode sink, string parameterName
where
any(LdapInjection::DnConfiguration dnConfig).hasFlowPath(source, sink) and
any(DnConfiguration dnConfig).hasFlowPath(source, sink) and
parameterName = "DN"
or
any(LdapInjection::FilterConfiguration filterConfig).hasFlowPath(source, sink) and
any(FilterConfiguration filterConfig).hasFlowPath(source, sink) and
parameterName = "filter"
select sink.getNode(), source, sink,
"$@ LDAP query parameter (" + parameterName + ") comes from $@.", sink.getNode(), "This",

View File

@@ -15,10 +15,10 @@
*/
import python
import semmle.python.security.dataflow.CodeInjection
import semmle.python.security.dataflow.CodeInjectionQuery
import DataFlow::PathGraph
from CodeInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -12,10 +12,10 @@
*/
import python
import semmle.python.security.dataflow.LogInjection
import semmle.python.security.dataflow.LogInjectionQuery
import DataFlow::PathGraph
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -14,10 +14,10 @@
*/
import python
import semmle.python.security.dataflow.StackTraceExposure
import semmle.python.security.dataflow.StackTraceExposureQuery
import DataFlow::PathGraph
from StackTraceExposure::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ may be exposed to an external user", source.getNode(),
"Error information"

View File

@@ -15,41 +15,11 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Gets a call to a method that makes an outgoing request using the `requests` module,
* such as `requests.get` or `requests.put`, with the specified HTTP verb `verb`
*/
DataFlow::CallCfgNode outgoingRequestCall(string verb) {
verb = HTTP::httpVerbLower() and
result = API::moduleImport("requests").getMember(verb).getACall()
}
/** Gets the "verfiy" argument to a outgoingRequestCall. */
DataFlow::Node verifyArg(DataFlow::CallCfgNode call) {
call = outgoingRequestCall(_) and
result = call.getArgByName("verify")
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = verifyArg(_) and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
from DataFlow::CallCfgNode call, DataFlow::Node falseyOrigin, string verb
from API::CallNode call, DataFlow::Node falseyOrigin, string verb
where
call = outgoingRequestCall(verb) and
falseyOrigin = verifyArgBacktracker(verifyArg(call)) and
verb = HTTP::httpVerbLower() and
call = API::moduleImport("requests").getMember(verb).getACall() and
falseyOrigin = call.getKeywordParameter("verify").getAValueReachingRhs() and
// requests treats `None` as the default and all other "falsey" values as `False`.
falseyOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not falseyOrigin.asExpr() instanceof None

View File

@@ -16,7 +16,7 @@
import python
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextLogging::CleartextLogging
import semmle.python.security.dataflow.CleartextLoggingQuery
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where

View File

@@ -16,7 +16,7 @@
import python
private import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
import semmle.python.security.dataflow.CleartextStorage::CleartextStorage
import semmle.python.security.dataflow.CleartextStorageQuery
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, string classification
where

View File

@@ -13,7 +13,7 @@
*/
import python
import semmle.python.security.dataflow.WeakSensitiveDataHashing
import semmle.python.security.dataflow.WeakSensitiveDataHashingQuery
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph

View File

@@ -13,9 +13,9 @@
*/
import python
import semmle.python.security.dataflow.UnsafeDeserialization
import semmle.python.security.dataflow.UnsafeDeserializationQuery
import DataFlow::PathGraph
from UnsafeDeserialization::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -13,10 +13,10 @@
*/
import python
import semmle.python.security.dataflow.UrlRedirect
import semmle.python.security.dataflow.UrlRedirectQuery
import DataFlow::PathGraph
from UrlRedirect::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
"A user-provided value"

View File

@@ -12,9 +12,9 @@
*/
import python
import semmle.python.security.dataflow.XpathInjection
import semmle.python.security.dataflow.XpathInjectionQuery
import DataFlow::PathGraph
from XpathInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "This Xpath query depends on $@.", source, "a user-provided value"

View File

@@ -14,12 +14,12 @@
import python
import semmle.python.security.performance.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoS
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph
from
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode,
PolynomialBackTrackingTerm regexp
where
config.hasFlowPath(source, sink) and
sinkNode = sink.getNode() and

View File

@@ -14,15 +14,15 @@
import python
private import semmle.python.Concepts
import semmle.python.security.dataflow.RegexInjection
import semmle.python.security.dataflow.RegexInjectionQuery
import DataFlow::PathGraph
from
RegexInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
RegexExecution regexExecution
where
config.hasFlowPath(source, sink) and
regexExecution = sink.getNode().(RegexInjection::Sink).getRegexExecution()
regexExecution = sink.getNode().(Sink).getRegexExecution()
select sink.getNode(), source, sink,
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
source.getNode(), "user-provided value", regexExecution, regexExecution.getName()

View File

@@ -11,14 +11,14 @@
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import semmle.python.security.dataflow.ServerSideRequestForgeryQuery
import DataFlow::PathGraph
from
FullServerSideRequestForgery::Configuration fullConfig, DataFlow::PathNode source,
FullServerSideRequestForgeryConfiguration fullConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(FullServerSideRequestForgery::Sink).getRequest() and
request = sink.getNode().(Sink).getRequest() and
fullConfig.hasFlowPath(source, sink) and
fullyControlledRequest(request)
select request, source, sink, "The full URL of this request depends on $@.", source.getNode(),

View File

@@ -11,14 +11,14 @@
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import semmle.python.security.dataflow.ServerSideRequestForgeryQuery
import DataFlow::PathGraph
from
PartialServerSideRequestForgery::Configuration partialConfig, DataFlow::PathNode source,
PartialServerSideRequestForgeryConfiguration partialConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(PartialServerSideRequestForgery::Sink).getRequest() and
request = sink.getNode().(Sink).getRequest() and
partialConfig.hasFlowPath(source, sink) and
not fullyControlledRequest(request)
select request, source, sink, "Part of the URL of this request depends on $@.", source.getNode(),

View File

@@ -14,6 +14,14 @@
import python
predicate isInsideLoop(AstNode node) {
node.getParentNode() instanceof While
or
node.getParentNode() instanceof For
or
exists(AstNode prev | isInsideLoop(prev) | node = prev.getAChildNode())
}
from Delete del, Expr e, Function f
where
f.getLastStatement() = del and
@@ -21,7 +29,7 @@ where
f.containsInScope(e) and
not e instanceof Subscript and
not e instanceof Attribute and
not exists(Stmt s | s.(While).contains(del) or s.(For).contains(del)) and
not isInsideLoop(del) and
// False positive: calling `sys.exc_info` within a function results in a
// reference cycle, and an explicit call to `del` helps break this cycle.
not exists(FunctionValue ex |

View File

@@ -0,0 +1,16 @@
/**
* @name Call graph
* @description An edge in the points-to call graph.
* @kind problem
* @problem.severity recommendation
* @id py/meta/points-to-call-graph
* @tags meta
* @precision very-low
*/
import python
import semmle.python.dataflow.new.internal.DataFlowPrivate
from DataFlowCall c, DataFlowCallableValue f
where c.getCallable() = f
select c, "Call to $@", f.getScope(), f.toString()