Merge branch 'main' into alamofire2

This commit is contained in:
Geoffrey White
2022-11-15 12:19:54 +00:00
37 changed files with 1677 additions and 3663 deletions

View File

@@ -1,8 +1,8 @@
import csharp
import semmle.code.csharp.dataflow.internal.SsaImpl::Consistency as Consistency
import semmle.code.csharp.dataflow.internal.SsaImpl::Consistency
import Ssa
class MyRelevantDefinition extends Consistency::RelevantDefinition, Ssa::Definition {
class MyRelevantDefinition extends RelevantDefinition, Ssa::Definition {
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
@@ -10,14 +10,6 @@ class MyRelevantDefinition extends Consistency::RelevantDefinition, Ssa::Definit
}
}
query predicate nonUniqueDef = Consistency::nonUniqueDef/4;
query predicate readWithoutDef = Consistency::readWithoutDef/3;
query predicate deadDef = Consistency::deadDef/2;
query predicate notDominatedByDef = Consistency::notDominatedByDef/4;
query predicate localDeclWithSsaDef(LocalVariableDeclExpr d) {
// Local variables in C# must be initialized before every use, so uninitialized
// local variables should not have an SSA definition, as that would imply that

View File

@@ -57,7 +57,7 @@ func (l *Labeler) GlobalID(key string) Label {
label, exists := l.keyLabels[key]
if !exists {
id := l.nextID()
fmt.Fprintf(l.tw.zip, "%s=@\"%s\"\n", id, escapeString(key))
fmt.Fprintf(l.tw.wzip, "%s=@\"%s\"\n", id, escapeString(key))
label = Label{id}
l.keyLabels[key] = label
}
@@ -90,7 +90,7 @@ func (l *Labeler) LocalID(nd interface{}) Label {
// FreshID creates a fresh label and returns it
func (l *Labeler) FreshID() Label {
id := l.nextID()
fmt.Fprintf(l.tw.zip, "%s=*\n", id)
fmt.Fprintf(l.tw.wzip, "%s=*\n", id)
return Label{id}
}

View File

@@ -19,7 +19,8 @@ import (
// A Writer provides methods for writing data to a TRAP file
type Writer struct {
zip *gzip.Writer
w *bufio.Writer
wzip *bufio.Writer
wfile *bufio.Writer
file *os.File
Labeler *Labeler
path string
@@ -54,11 +55,13 @@ func NewWriter(path string, pkg *packages.Package) (*Writer, error) {
if err != nil {
return nil, err
}
bufioWriter := bufio.NewWriter(tmpFile)
zipWriter := gzip.NewWriter(bufioWriter)
bufioFileWriter := bufio.NewWriter(tmpFile)
zipWriter := gzip.NewWriter(bufioFileWriter)
bufioZipWriter := bufio.NewWriter(zipWriter)
tw := &Writer{
zipWriter,
bufioWriter,
bufioZipWriter,
bufioFileWriter,
tmpFile,
nil,
path,
@@ -88,13 +91,19 @@ func trapFolder() (string, error) {
// Close the underlying file writer
func (tw *Writer) Close() error {
err := tw.zip.Close()
err := tw.wzip.Flush()
if err != nil {
// throw away file close error
tw.file.Close()
return err
}
err = tw.zip.Close()
if err != nil {
// return zip-close error, but ignore file-close error
tw.file.Close()
return err
}
err = tw.w.Flush()
err = tw.wfile.Flush()
if err != nil {
// throw away close error because write errors are likely to be more important
tw.file.Close()
@@ -145,24 +154,24 @@ func capStringLength(s string) string {
// Emit writes out a tuple of values for the given `table`
func (tw *Writer) Emit(table string, values []interface{}) error {
fmt.Fprintf(tw.zip, "%s(", table)
fmt.Fprintf(tw.wzip, "%s(", table)
for i, value := range values {
if i > 0 {
fmt.Fprint(tw.zip, ", ")
fmt.Fprint(tw.wzip, ", ")
}
switch value := value.(type) {
case Label:
fmt.Fprint(tw.zip, value.id)
fmt.Fprint(tw.wzip, value.id)
case string:
fmt.Fprintf(tw.zip, "\"%s\"", escapeString(capStringLength(value)))
fmt.Fprintf(tw.wzip, "\"%s\"", escapeString(capStringLength(value)))
case int:
fmt.Fprintf(tw.zip, "%d", value)
fmt.Fprintf(tw.wzip, "%d", value)
case float64:
fmt.Fprintf(tw.zip, "%e", value)
fmt.Fprintf(tw.wzip, "%e", value)
default:
return errors.New("Cannot emit value")
}
}
fmt.Fprintf(tw.zip, ")\n")
fmt.Fprintf(tw.wzip, ")\n")
return nil
}

View File

@@ -138,6 +138,10 @@ open class LoggerBase(val logCounter: LogCounter) {
fullMsgBuilder.append(suffix)
val fullMsg = fullMsgBuilder.toString()
emitDiagnostic(tw, severity, diagnosticLocStr, msg, fullMsg, locationString, mkLocationId)
}
private fun emitDiagnostic(tw: TrapWriter, severity: Severity, diagnosticLocStr: String, msg: String, fullMsg: String, locationString: String? = null, mkLocationId: () -> Label<DbLocation> = { tw.unknownLocation }) {
val locStr = if (locationString == null) "" else "At " + locationString + ": "
val kind = if (severity <= Severity.WarnHigh) "WARN" else "ERROR"
val logMessage = LogMessage(kind, "Diagnostic($diagnosticLocStr): $locStr$fullMsg")
@@ -190,9 +194,10 @@ open class LoggerBase(val logCounter: LogCounter) {
// We don't know if this location relates to an error
// or a warning, so we just declare hitting the limit
// to be an error regardless.
val logMessage = LogMessage("ERROR", "Total of $count diagnostics from $caller.")
tw.writeComment(logMessage.toText())
logStream.write(logMessage.toJsonLine())
val message = "Total of $count diagnostics (reached limit of ${logCounter.diagnosticLimit}) from $caller."
if (verbosity >= 1) {
emitDiagnostic(tw, Severity.Error, "Limit", message, message)
}
}
}
}

View File

@@ -9,6 +9,13 @@
import java
import semmle.code.java.Diagnostics
predicate compilationInfo(string key, int value) {
exists(Compilation c, string infoKey |
key = infoKey + ": " + c.getInfo(infoKey) and
value = 1
)
}
predicate fileCount(string key, int value) {
key = "Number of files" and
value = strictcount(File f)
@@ -53,13 +60,38 @@ predicate extractorDiagnostics(string key, int value) {
)
}
/*
* Just counting the diagnostics doesn't give the full picture, as
* CODEQL_EXTRACTOR_KOTLIN_DIAGNOSTIC_LIMIT means that some diagnostics
* will be suppressed. In that case, we need to look for the
* suppression message, uncount those that did get emitted, uncount the
* suppression message itself, and then add on the full count.
*/
predicate extractorTotalDiagnostics(string key, int value) {
exists(string extractor, string limitRegex |
limitRegex = "Total of ([0-9]+) diagnostics \\(reached limit of ([0-9]+)\\).*" and
key = "Total number of diagnostics from " + extractor and
value =
strictcount(Diagnostic d | d.getGeneratedBy() = extractor) +
sum(Diagnostic d |
d.getGeneratedBy() = extractor
|
d.getMessage().regexpCapture(limitRegex, 1).toInt() -
d.getMessage().regexpCapture(limitRegex, 2).toInt() - 1
)
)
}
from string key, int value
where
compilationInfo(key, value) or
fileCount(key, value) or
fileCountByExtension(key, value) or
totalNumberOfLines(key, value) or
numberOfLinesOfCode(key, value) or
totalNumberOfLinesByExtension(key, value) or
numberOfLinesOfCodeByExtension(key, value) or
extractorDiagnostics(key, value)
extractorDiagnostics(key, value) or
extractorTotalDiagnostics(key, value)
select key, value

View File

@@ -50,7 +50,8 @@ abstract class AtmConfig extends string {
// known sink for the class.
exists(EndpointCharacteristic characteristic |
characteristic.getEndpoints(sink) and
characteristic.getImplications(this.getASinkEndpointType(), true, 1.0)
characteristic
.getImplications(this.getASinkEndpointType(), true, characteristic.maximalConfidence())
)
}

View File

@@ -30,18 +30,36 @@ abstract class EndpointCharacteristic extends string {
/**
* This predicate describes what the characteristic tells us about an endpoint.
*
* Params:
* endpointClass: Class 0 is the negative class. Each positive int corresponds to a single sink type.
* isPositiveIndicator: Does this characteristic indicate this endpoint _is_ a member of the class, or that it
* _isn't_ a member of the class?
* confidence: A number in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
* belonging / not belonging to the given class.
* Params:
* endpointClass: The sink type. Each EndpointType has a predicate getEncoding, which specifies the classifier
* class for this sink type. Class 0 is the negative class (non-sink). Each positive int corresponds to a single
* sink type.
* isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if
* false, it indicates that it _isn't_ a member of the class.
* confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
* belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak
* indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with
* this characteristic definitively do/don't belong to the class.
*/
abstract predicate getImplications(
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
);
// The following are some confidence values that are used in practice by the subclasses. They are defined as named
// constants here to make it easier to change them in the future.
final float maximalConfidence() { result = 1.0 }
final float highConfidence() { result = 0.9 }
final float mediumConfidence() { result = 0.6 }
}
/*
* Characteristics that are indicative of a sink.
* NOTE: Initially each sink type has only one characteristic, which is that it's a sink of this type in the standard
* JavaScript libraries.
*/
/**
* Endpoints identified as "DomBasedXssSink" by the standard JavaScript libraries are XSS sinks with maximal confidence.
*/
@@ -53,7 +71,9 @@ private class DomBasedXssSinkCharacteristic extends EndpointCharacteristic {
override predicate getImplications(
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
) {
endpointClass instanceof XssSinkType and isPositiveIndicator = true and confidence = 1.0
endpointClass instanceof XssSinkType and
isPositiveIndicator = true and
confidence = maximalConfidence()
}
}
@@ -69,7 +89,9 @@ private class TaintedPathSinkCharacteristic extends EndpointCharacteristic {
override predicate getImplications(
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
) {
endpointClass instanceof TaintedPathSinkType and isPositiveIndicator = true and confidence = 1.0
endpointClass instanceof TaintedPathSinkType and
isPositiveIndicator = true and
confidence = maximalConfidence()
}
}
@@ -87,7 +109,7 @@ private class SqlInjectionSinkCharacteristic extends EndpointCharacteristic {
) {
endpointClass instanceof SqlInjectionSinkType and
isPositiveIndicator = true and
confidence = 1.0
confidence = maximalConfidence()
}
}
@@ -105,6 +127,315 @@ private class NosqlInjectionSinkCharacteristic extends EndpointCharacteristic {
) {
endpointClass instanceof NosqlInjectionSinkType and
isPositiveIndicator = true and
confidence = 1.0
confidence = maximalConfidence()
}
}
/*
* Characteristics that are indicative of not being a sink of any type.
*/
/**
* A characteristic that is an indicator of not being a sink of any type, because it's an argument to a function of a
* builtin object.
*/
abstract private class ArgumentToBuiltinFunctionCharacteristic extends EndpointCharacteristic {
bindingset[this]
ArgumentToBuiltinFunctionCharacteristic() { any() }
}
/**
* A high-confidence characteristic that indicates that an endpoint is not a sink of any type.
*/
abstract private class NotASinkCharacteristic extends EndpointCharacteristic {
bindingset[this]
NotASinkCharacteristic() { any() }
override predicate getImplications(
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
) {
endpointClass instanceof NegativeType and
isPositiveIndicator = true and
confidence = highConfidence()
}
}
/**
* A medium-confidence characteristic that indicates that an endpoint is not a sink of any type.
*
* TODO: This class is currently not private, because the current extraction logic explicitly avoids including these
* endpoints in the training data. We might want to change this in the future.
*/
abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
bindingset[this]
LikelyNotASinkCharacteristic() { any() }
override predicate getImplications(
EndpointType endpointClass, boolean isPositiveIndicator, float confidence
) {
endpointClass instanceof NegativeType and
isPositiveIndicator = true and
confidence = mediumConfidence()
}
}
private class LodashUnderscore extends NotASinkCharacteristic {
LodashUnderscore() { this = "LodashUnderscoreArgument" }
override predicate getEndpoints(DataFlow::Node n) {
any(LodashUnderscore::Member m).getACall().getAnArgument() = n
}
}
private class JQueryArgumentCharacteristic extends NotASinkCharacteristic {
JQueryArgumentCharacteristic() { this = "JQueryArgument" }
override predicate getEndpoints(DataFlow::Node n) {
any(JQuery::MethodCall m).getAnArgument() = n
}
}
private class ClientRequestCharacteristic extends NotASinkCharacteristic {
ClientRequestCharacteristic() { this = "ClientRequest" }
override predicate getEndpoints(DataFlow::Node n) {
exists(ClientRequest r |
r.getAnArgument() = n or n = r.getUrl() or n = r.getHost() or n = r.getADataNode()
)
}
}
private class PromiseDefinitionCharacteristic extends NotASinkCharacteristic {
PromiseDefinitionCharacteristic() { this = "PromiseDefinition" }
override predicate getEndpoints(DataFlow::Node n) {
exists(PromiseDefinition p |
n = [p.getResolveParameter(), p.getRejectParameter()].getACall().getAnArgument()
)
}
}
private class CryptographicKeyCharacteristic extends NotASinkCharacteristic {
CryptographicKeyCharacteristic() { this = "CryptographicKey" }
override predicate getEndpoints(DataFlow::Node n) { n instanceof CryptographicKey }
}
private class CryptographicOperationFlowCharacteristic extends NotASinkCharacteristic {
CryptographicOperationFlowCharacteristic() { this = "CryptographicOperationFlow" }
override predicate getEndpoints(DataFlow::Node n) {
any(CryptographicOperation op).getInput() = n
}
}
private class LoggerMethodCharacteristic extends NotASinkCharacteristic {
LoggerMethodCharacteristic() { this = "LoggerMethod" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName() = getAStandardLoggerMethodName()
)
}
}
private class TimeoutCharacteristic extends NotASinkCharacteristic {
TimeoutCharacteristic() { this = "Timeout" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName() = ["setTimeout", "clearTimeout"]
)
}
}
private class ReceiverStorageCharacteristic extends NotASinkCharacteristic {
ReceiverStorageCharacteristic() { this = "ReceiverStorage" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getReceiver() = DataFlow::globalVarRef(["localStorage", "sessionStorage"])
)
}
}
private class StringStartsWithCharacteristic extends NotASinkCharacteristic {
StringStartsWithCharacteristic() { this = "StringStartsWith" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call instanceof StringOps::StartsWith
)
}
}
private class StringEndsWithCharacteristic extends NotASinkCharacteristic {
StringEndsWithCharacteristic() { this = "StringEndsWith" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() | call instanceof StringOps::EndsWith)
}
}
private class StringRegExpTestCharacteristic extends NotASinkCharacteristic {
StringRegExpTestCharacteristic() { this = "StringRegExpTest" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call instanceof StringOps::RegExpTest
)
}
}
private class EventRegistrationCharacteristic extends NotASinkCharacteristic {
EventRegistrationCharacteristic() { this = "EventRegistration" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() | call instanceof EventRegistration)
}
}
private class EventDispatchCharacteristic extends NotASinkCharacteristic {
EventDispatchCharacteristic() { this = "EventDispatch" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() | call instanceof EventDispatch)
}
}
private class MembershipCandidateTestCharacteristic extends NotASinkCharacteristic {
MembershipCandidateTestCharacteristic() { this = "MembershipCandidateTest" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call = any(MembershipCandidate c).getTest()
)
}
}
private class FileSystemAccessCharacteristic extends NotASinkCharacteristic {
FileSystemAccessCharacteristic() { this = "FileSystemAccess" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() | call instanceof FileSystemAccess)
}
}
private class DatabaseAccessCharacteristic extends NotASinkCharacteristic {
DatabaseAccessCharacteristic() { this = "DatabaseAccess" }
override predicate getEndpoints(DataFlow::Node n) {
// TODO database accesses are less well defined than database query sinks, so this may cover unmodeled sinks on
// existing database models
exists(DataFlow::CallNode call | n = call.getAnArgument() |
[
call, call.getAMethodCall()
/* command pattern where the query is built, and then exec'ed later */ ] instanceof
DatabaseAccess
)
}
}
private class DomCharacteristic extends NotASinkCharacteristic {
DomCharacteristic() { this = "DOM" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() | call = DOM::domValueRef())
}
}
private class NextFunctionCallCharacteristic extends NotASinkCharacteristic {
NextFunctionCallCharacteristic() { this = "NextFunctionCall" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call.getCalleeName() = "next" and
exists(DataFlow::FunctionNode f | call = f.getLastParameter().getACall())
)
}
}
private class DojoRequireCharacteristic extends NotASinkCharacteristic {
DojoRequireCharacteristic() { this = "DojoRequire" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call | n = call.getAnArgument() |
call = DataFlow::globalVarRef("dojo").getAPropertyRead("require").getACall()
)
}
}
private class Base64ManipulationCharacteristic extends NotASinkCharacteristic {
Base64ManipulationCharacteristic() { this = "Base64Manipulation" }
override predicate getEndpoints(DataFlow::Node n) {
exists(Base64::Decode d | n = d.getInput()) or
exists(Base64::Encode d | n = d.getInput())
}
}
private class ArgumentToArrayCharacteristic extends ArgumentToBuiltinFunctionCharacteristic,
LikelyNotASinkCharacteristic {
ArgumentToArrayCharacteristic() { this = "ArgumentToArray" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::SourceNode builtin, DataFlow::SourceNode receiver, DataFlow::InvokeNode invk |
builtin instanceof DataFlow::ArrayCreationNode
|
receiver = [builtin.getAnInvocation(), builtin] and
invk = [receiver, receiver.getAPropertyRead()].getAnInvocation() and
invk.getAnArgument() = n
)
}
}
private class ArgumentToBuiltinGlobalVarRefCharacteristic extends ArgumentToBuiltinFunctionCharacteristic,
LikelyNotASinkCharacteristic {
ArgumentToBuiltinGlobalVarRefCharacteristic() { this = "ArgumentToBuiltinGlobalVarRef" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::SourceNode builtin, DataFlow::SourceNode receiver, DataFlow::InvokeNode invk |
builtin =
DataFlow::globalVarRef([
"Map", "Set", "WeakMap", "WeakSet", "Number", "Object", "String", "Array", "Error",
"Math", "Boolean"
])
|
receiver = [builtin.getAnInvocation(), builtin] and
invk = [receiver, receiver.getAPropertyRead()].getAnInvocation() and
invk.getAnArgument() = n
)
}
}
private class ConstantReceiverCharacteristic extends ArgumentToBuiltinFunctionCharacteristic,
NotASinkCharacteristic {
ConstantReceiverCharacteristic() { this = "ConstantReceiver" }
override predicate getEndpoints(DataFlow::Node n) {
exists(Expr primitive, MethodCallExpr c |
primitive instanceof ConstantString or
primitive instanceof NumberLiteral or
primitive instanceof BooleanLiteral
|
c.calls(primitive, _) and
c.getAnArgument() = n.asExpr()
)
}
}
private class BuiltinCallNameCharacteristic extends ArgumentToBuiltinFunctionCharacteristic,
NotASinkCharacteristic {
BuiltinCallNameCharacteristic() { this = "BuiltinCallName" }
override predicate getEndpoints(DataFlow::Node n) {
exists(DataFlow::CallNode call |
call.getAnArgument() = n and
call.getCalleeName() =
[
"indexOf", "hasOwnProperty", "substring", "isDecimal", "decode", "encode", "keys",
"shift", "values", "forEach", "toString", "slice", "splice", "push", "isArray", "sort"
]
)
}
}

View File

@@ -16,6 +16,11 @@ newtype TEndpointType =
abstract class EndpointType extends TEndpointType {
abstract string getDescription();
/**
* Gets the integer representation of this endpoint type. This integer representation specifies the class number
* used by the endpoint scoring model (the classifier) to represent this endpoint type. Class 0 is the negative
* class (non-sink). Each positive int corresponds to a single sink type.
*/
abstract int getEncoding();
string toString() { result = getDescription() }

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The ReDoS libraries in `semmle.code.python.security.regexp` has been moved to a shared pack inside the `shared/` folder, and the previous location has been deprecated.

View File

@@ -5,3 +5,5 @@ dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
upgrades: upgrades
dependencies:
codeql/regex: ${workspace}

File diff suppressed because it is too large Load Diff

View File

@@ -2,155 +2,7 @@
* Provides predicates for reasoning about bad tag filter vulnerabilities.
*/
import regexp.RegexpMatching
/**
* Holds if the regexp `root` should be tested against `str`.
* Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`.
* `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored.
* `testWithGroups` toggles whether it's tested which groups are filled by a given input string.
*/
private predicate isBadTagFilterCandidate(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
) {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
root
) and
ignorePrefix = true and
(
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and
testWithGroups = true
or
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
] and
testWithGroups = false
)
}
/**
* A regexp that matches some string from the `isBadTagFilterCandidate` predicate.
*/
class HtmlMatchingRegExp extends RootTerm {
HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) }
/** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) }
/** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */
predicate fillsCaptureGroup(string str, int g) {
RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g)
}
}
/** DEPRECATED: Alias for HtmlMatchingRegExp */
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambiguously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as an HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// BadTagFilterQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep
import Dep

View File

@@ -2,288 +2,7 @@
* Classes and predicates for working with suspicious character ranges.
*/
// We don't need the NFA utils, just the regexp tree.
// but the below is a nice shared library that exposes the API we need.
import regexp.NfaUtils
/**
* Gets a rank for `range` that is unique for ranges in the same file.
* Prioritizes ranges that match more characters.
*/
int rankRange(RegExpCharacterRange range) {
range =
rank[result](RegExpCharacterRange r, Location l, int low, int high |
r.getLocation() = l and
isRange(r, low, high)
|
r order by (high - low) desc, l.getStartLine(), l.getStartColumn()
)
}
/** Holds if `range` spans from the unicode code points `low` to `high` (both inclusive). */
predicate isRange(RegExpCharacterRange range, int low, int high) {
exists(string lowc, string highc |
range.isRange(lowc, highc) and
low.toUnicode() = lowc and
high.toUnicode() = highc
)
}
/** Holds if `char` is an alpha-numeric character. */
predicate isAlphanumeric(string char) {
// written like this to avoid having a bindingset for the predicate
char = [[48 .. 57], [65 .. 90], [97 .. 122]].toUnicode() // 0-9, A-Z, a-z
}
/**
* Holds if the given ranges are from the same character class
* and there exists at least one character matched by both ranges.
*/
predicate overlap(RegExpCharacterRange a, RegExpCharacterRange b) {
exists(RegExpCharacterClass clz |
a = clz.getAChild() and
b = clz.getAChild() and
a != b
|
exists(int alow, int ahigh, int blow, int bhigh |
isRange(a, alow, ahigh) and
isRange(b, blow, bhigh) and
alow <= bhigh and
blow <= ahigh
)
)
}
/**
* Holds if `range` overlaps with the char class `escape` from the same character class.
*/
predicate overlapsWithCharEscape(RegExpCharacterRange range, RegExpCharacterClassEscape escape) {
exists(RegExpCharacterClass clz, string low, string high |
range = clz.getAChild() and
escape = clz.getAChild() and
range.isRange(low, high)
|
escape.getValue() = "w" and
getInRange(low, high).regexpMatch("\\w")
or
escape.getValue() = "d" and
getInRange(low, high).regexpMatch("\\d")
or
escape.getValue() = "s" and
getInRange(low, high).regexpMatch("\\s")
)
}
/** Gets the unicode code point for a `char`. */
bindingset[char]
int toCodePoint(string char) { result.toUnicode() = char }
/** A character range that appears to be overly wide. */
class OverlyWideRange extends RegExpCharacterRange {
OverlyWideRange() {
exists(int low, int high, int numChars |
isRange(this, low, high) and
numChars = (1 + high - low) and
this.getRootTerm().isUsedAsRegExp() and
numChars >= 10
|
// across the Z-a range (which includes backticks)
toCodePoint("Z") >= low and
toCodePoint("a") <= high
or
// across the 9-A range (which includes e.g. ; and ?)
toCodePoint("9") >= low and
toCodePoint("A") <= high
or
// a non-alphanumeric char as part of the range boundaries
exists(int bound | bound = [low, high] | not isAlphanumeric(bound.toUnicode())) and
// while still being ascii
low < 128 and
high < 128
) and
// allowlist for known ranges
not this = allowedWideRanges()
}
/** Gets a string representation of a character class that matches the same chars as this range. */
string printEquivalent() { result = RangePrinter::printEquivalentCharClass(this) }
}
/** Gets a range that should not be reported as an overly wide range. */
RegExpCharacterRange allowedWideRanges() {
// ~ is the last printable ASCII character, it's used right in various wide ranges.
result.isRange(_, "~")
or
// the same with " " and "!". " " is the first printable character, and "!" is the first non-white-space printable character.
result.isRange([" ", "!"], _)
or
// the `[@-_]` range is intentional
result.isRange("@", "_")
or
// starting from the zero byte is a good indication that it's purposely matching a large range.
result.isRange(0.toUnicode(), _)
}
/** Gets a char between (and including) `low` and `high`. */
bindingset[low, high]
private string getInRange(string low, string high) {
result = [toCodePoint(low) .. toCodePoint(high)].toUnicode()
}
/** A module computing an equivalent character class for an overly wide range. */
module RangePrinter {
bindingset[char]
bindingset[result]
private string next(string char) {
exists(int prev, int next |
prev.toUnicode() = char and
next.toUnicode() = result and
next = prev + 1
)
}
/** Gets the points where the parts of the pretty printed range should be cut off. */
private string cutoffs() { result = ["A", "Z", "a", "z", "0", "9"] }
/** Gets the char to use in the low end of a range for a given `cut` */
private string lowCut(string cut) {
cut = ["A", "a", "0"] and
result = cut
or
cut = ["Z", "z", "9"] and
result = next(cut)
}
/** Gets the char to use in the high end of a range for a given `cut` */
private string highCut(string cut) {
cut = ["Z", "z", "9"] and
result = cut
or
cut = ["A", "a", "0"] and
next(result) = cut
}
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
private string cutoff(OverlyWideRange range, int part) {
exists(int low, int high | isRange(range, low, high) |
result =
rank[part + 1](string cut |
cut = cutoffs() and low < toCodePoint(cut) and toCodePoint(cut) < high
|
cut order by toCodePoint(cut)
)
)
}
/** Gets the number of parts we should print for a given `range`. */
private int parts(OverlyWideRange range) { result = 1 + count(cutoff(range, _)) }
/** Holds if the given part of a range should span from `low` to `high`. */
private predicate part(OverlyWideRange range, int part, string low, string high) {
// first part.
part = 0 and
(
range.isRange(low, high) and
parts(range) = 1
or
parts(range) >= 2 and
range.isRange(low, _) and
high = highCut(cutoff(range, part))
)
or
// middle
part >= 1 and
part < parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
high = highCut(cutoff(range, part))
or
// last.
part = parts(range) - 1 and
low = lowCut(cutoff(range, part - 1)) and
range.isRange(_, high)
}
/** Gets an escaped `char` for use in a character class. */
bindingset[char]
private string escape(string char) {
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
if char.regexpMatch(reg) then result = "\\" + char else result = char
)
}
/** Gets a part of the equivalent range. */
private string printEquivalentCharClass(OverlyWideRange range, int part) {
exists(string low, string high | part(range, part, low, high) |
if
isAlphanumeric(low) and
isAlphanumeric(high)
then result = low + "-" + high
else
result =
strictconcat(string char | char = getInRange(low, high) | escape(char) order by char)
)
}
/** Gets the entire pretty printed equivalent range. */
string printEquivalentCharClass(OverlyWideRange range) {
result =
strictconcat(string r, int part |
r = "[" and part = -1 and exists(range)
or
r = printEquivalentCharClass(range, part)
or
r = "]" and part = parts(range)
|
r order by part
)
}
}
/** Gets a char range that is overly large because of `reason`. */
RegExpCharacterRange getABadRange(string reason, int priority) {
result instanceof OverlyWideRange and
priority = 0 and
exists(string equiv | equiv = result.(OverlyWideRange).printEquivalent() |
if equiv.length() <= 50
then reason = "is equivalent to " + equiv
else reason = "is equivalent to " + equiv.substring(0, 50) + "..."
)
or
priority = 1 and
exists(RegExpCharacterRange other |
reason = "overlaps with " + other + " in the same character class" and
rankRange(result) < rankRange(other) and
overlap(result, other)
)
or
priority = 2 and
exists(RegExpCharacterClassEscape escape |
reason = "overlaps with " + escape + " in the same character class" and
overlapsWithCharEscape(result, escape)
)
or
reason = "is empty" and
priority = 3 and
exists(int low, int high |
isRange(result, low, high) and
low > high
)
}
/** Holds if `range` matches suspiciously many characters. */
predicate problem(RegExpCharacterRange range, string reason) {
reason =
strictconcat(string m, int priority |
range = getABadRange(m, priority)
|
m, ", and " order by priority desc
) and
// specifying a range using an escape is usually OK.
not range.getAChild() instanceof RegExpEscape and
// Unicode escapes in strings are interpreted before it turns into a regexp,
// so e.g. [\u0001-\uFFFF] will just turn up as a range between two constants.
// We therefore exclude these ranges.
range.getRootTerm().getParent() instanceof RegExpLiteral and
// is used as regexp (mostly for JS where regular expressions are parsed eagerly)
range.getRootTerm().isUsedAsRegExp()
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// OverlyLargeRangeQuery should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.OverlyLargeRangeQuery::Make<TreeView> as Dep
import Dep

View File

@@ -11,7 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.RegexTreeView
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
private import semmle.python.ApiGraphs
/**
@@ -20,6 +20,9 @@ private import semmle.python.ApiGraphs
* vulnerabilities, as well as extension points for adding your own.
*/
module PolynomialReDoS {
private import TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
/**
* A data flow source for "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
*/

View File

@@ -62,284 +62,7 @@
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
import NfaUtils
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*/
private newtype TStatePair =
/**
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
stateInsideBacktracking(s) and
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for `(fork, fork)` state pairs when `isFork(fork, _, _, _, _)` holds.
*
* Used in `statePairDistToFork`
*/
private predicate isStatePairFork(StatePair p) {
exists(State fork | p = MkStatePair(fork, fork) and isFork(fork, _, _, _, _))
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDistToFork`
*/
private predicate reverseStep(StatePair r, StatePair q) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDistToFork(StatePair q, StatePair r) =
shortestDistances(isStatePairFork/1, reverseStep/2)(r, q, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuristic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) { isReachableFromFork(_, _, s1, s2, t, _) }
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
exists(InputSymbol s1, InputSymbol s2, Trace v |
isReachableFromFork(fork, r, s1, s2, v, rem) and
w = Step(s1, s2, v)
)
}
private predicate isReachableFromFork(
State fork, StatePair r, InputSymbol s1, InputSymbol s2, Trace v, int rem
) {
// base case
exists(State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
v = Nil() and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
rem = statePairDistToFork(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, result) }
/** Holds if `n` is a trace that is used by `concretize` in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State f | isReachableFromFork(f, getAForkPair(f), n, _))
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2 | t = Step(s1, s2, _) | result = intersect(s1, s2))
}
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = Concretizer<CharTreeImpl>::concretize(t)
)
}
/** Holds if `state` has exponential ReDoS */
predicate hasReDoSResult = ReDoSPruning<isPumpable/2>::hasReDoSResult/4;
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// ExponentialBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as Dep
import Dep

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +0,0 @@
/**
* Provides Python-specific definitions for use in the NfaUtils module.
*/
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an escape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if `term` is a possessive quantifier.
* As python's regexes do not support possessive quantifiers, this never holds, but is used by the shared library.
*/
predicate isPossessive(RegExpQuantifier term) { none() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnyPrefix(RegExpTerm term) { any() }
/**
* Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
* Not yet implemented for Python.
*/
predicate matchesAnySuffix(RegExpTerm term) { any() }
/**
* Holds if the regular expression should not be considered.
*
* We make the pragmatic performance optimization to ignore regular expressions in files
* that does not belong to the project code (such as installed dependencies).
*/
predicate isExcluded(RegExpParent parent) {
not exists(parent.getRegex().getLocation().getFile().getRelativePath())
or
// Regexes with many occurrences of ".*" may cause the polynomial ReDoS computation to explode, so
// we explicitly exclude these.
count(int i | exists(parent.getRegex().getText().regexpFind("\\.\\*", i, _)) | i) > 10
}
/**
* A module containing predicates for determining which flags a regular expression have.
*/
module RegExpFlags {
/**
* Holds if `root` has the `i` flag for case-insensitive matching.
*/
predicate isIgnoreCase(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isIgnoreCase()
}
/**
* Gets the flags for `root`, or the empty string if `root` has no flags.
*/
string getFlags(RegExpTerm root) {
root.isRootTerm() and
result = root.getLiteral().getFlags()
}
/**
* Holds if `root` has the `s` flag for multi-line matching.
*/
predicate isDotAll(RegExpTerm root) {
root.isRootTerm() and
root.getLiteral().isDotAll()
}
}

View File

@@ -3,155 +3,7 @@
* and for testing which capture groups are filled when a particular regexp matches a string.
*/
import NfaUtils
/** A root term */
class RootTerm extends RegExpTerm {
RootTerm() { this.isRootTerm() }
}
/**
* Holds if it should be tested whether `root` matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*
* If `testWithGroups` is true, then the `RegexpMatching::fillsCaptureGroup` predicate can be used to determine which capture
* groups are filled by a string.
*/
signature predicate isRegexpMatchingCandidateSig(
RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
);
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*
* The module parameter `isCandidate` determines which strings should be tested,
* and the results can be read from the `matches` and `fillsCaptureGroup` predicates.
*/
module RegexpMatching<isRegexpMatchingCandidateSig/4 isCandidate> {
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(RootTerm reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
isCandidate(reg, str, ignorePrefix, _) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
RootTerm reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, RootTerm r | isCandidate(r, s, _, _) | char = s.charAt(_)) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate satisfies the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(RootTerm reg, int i, string str, boolean ignorePrefix) {
// base case, reaches an accepting state from the last state in `getAState(..)`
isCandidate(reg, str, ignorePrefix, true) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
/**
* Holds if `reg` matches `str`, where `str` is in the `isCandidate` predicate.
*/
predicate matches(RootTerm reg, string str) {
exists(State state | state = getAState(reg, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` against `reg` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
predicate fillsCaptureGroup(RootTerm reg, string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(reg, _, str, _) and
g = group(s.getRepr())
)
}
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// RegexpMatching should be used directly from the shared pack, and not from this file.
deprecated import codeql.regex.nfa.RegexpMatching::Make<TreeView> as Dep
import Dep

View File

@@ -1,11 +1,4 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import NfaUtils
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
@@ -42,377 +35,7 @@ import NfaUtils
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string representation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
private predicate isReachableFromStartTuple(
State pivot, State succ, StateTuple tuple, Trace trace, int dist
) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
trace = Step(s1, s2, s3, v)
)
}
private predicate isReachableFromStartTuple(
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
StateTuple tuple, int dist
) {
// base case.
exists(State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Nil() and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p |
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, tuple)
)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
/** Holds if `n` is used in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State pivot, State succ |
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
)
}
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
result = getAThreewayIntersect(s1, s2, s3)
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = Concretizer<CharTreeImpl>::concretize(t)
)
}
/**
* Holds if states starting in `state` can have polynomial backtracking with the string `pump`.
*/
predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynomialReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
ReDoSPruning<isReDoSCandidate/2>::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynomialReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
// SuperlinearBackTracking should be used directly from the shared pack, and not from this file.
deprecated private import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView> as Dep
import Dep

View File

@@ -12,8 +12,9 @@
* external/cwe/cwe-020
*/
import semmle.python.security.OverlyLargeRangeQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.OverlyLargeRangeQuery::Make<TreeView>
from RegExpCharacterRange range, string reason
from TreeView::RegExpCharacterRange range, string reason
where problem(range, reason)
select range, "Suspicious character range that " + reason + "."

View File

@@ -14,7 +14,8 @@
* external/cwe/cwe-186
*/
import semmle.python.security.BadTagFilterQuery
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView>
from HtmlMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one

View File

@@ -14,7 +14,6 @@
*/
import python
import semmle.python.security.regexp.SuperlinearBackTracking
import semmle.python.security.dataflow.PolynomialReDoSQuery
import DataFlow::PathGraph

View File

@@ -14,10 +14,10 @@
* external/cwe/cwe-400
*/
import python
import semmle.python.security.regexp.ExponentialBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView>
from RegExpTerm t, string pump, State s, string prefixMsg
from TreeView::RegExpTerm t, string pump, State s, string prefixMsg
where
hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now

View File

@@ -1,2 +1,2 @@
| test.py:8:12:8:23 | Str | test.py:8:21:8:23 | \\s+ | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s+$ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |
| test.py:9:14:9:29 | Str | test.py:9:27:9:29 | \\d+ | Strings starting with '0.9' and with many repetitions of '99' can start matching anywhere after the start of the preceeding \\d+ |

View File

@@ -1,5 +1,6 @@
import python
import semmle.python.security.regexp.SuperlinearBackTracking
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
import codeql.regex.nfa.SuperlinearBackTracking::Make<TreeView>
from PolynomialBackTrackingTerm t
select t.getRegex(), t, t.getReason()
select t.(TreeView::RegExpTerm).getRegex(), t, t.getReason()

View File

@@ -16,4 +16,4 @@ nodes
subpaths
#select
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings starting with '0.9' and with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -2,20 +2,20 @@
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings starting with '*' and containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t"' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t'' and containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\t(' and containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '!\|\\n-\|\\n' and containing many repetitions of '\|\|\\n'. |
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings starting with '#' and containing many repetitions of '#'. |
| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$[' and containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '$.$[' and containing many repetitions of ']['. |
| redos.py:60:25:60:30 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:61:25:61:30 | [a-z]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings starting with '0' and containing many repetitions of '0'. |
| redos.py:63:26:63:33 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| redos.py:68:26:68:41 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:68:48:68:50 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '[' and containing many repetitions of ']['. |
@@ -51,7 +51,6 @@
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:199:28:199:29 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:202:26:202:32 | (a+a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:202:27:202:28 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:205:25:205:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:211:25:211:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -71,16 +70,16 @@
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""\\t0='. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<0\\t0=' and containing many repetitions of '""\\t0='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with '00000000000000' and containing many repetitions of 'e'. |
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of 'Xx'. |
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:319:28:319:33 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
| redos.py:322:25:322:29 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |

View File

@@ -1,18 +1,10 @@
import codeql.ruby.dataflow.SSA
import codeql.ruby.dataflow.internal.SsaImpl::Consistency as Consistency
import codeql.ruby.dataflow.internal.SsaImpl::Consistency
class MyRelevantDefinition extends Consistency::RelevantDefinition, Ssa::Definition {
class MyRelevantDefinition extends RelevantDefinition, Ssa::Definition {
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
query predicate nonUniqueDef = Consistency::nonUniqueDef/4;
query predicate readWithoutDef = Consistency::readWithoutDef/3;
query predicate deadDef = Consistency::deadDef/2;
query predicate notDominatedByDef = Consistency::notDominatedByDef/4;

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Data flow through the `ActiveSupport` extension `Enumerable#index_by` is now modeled.

View File

@@ -284,7 +284,17 @@ module ActiveSupport {
preservesValue = true
}
}
// TODO: index_by, index_with, pick, pluck (they require Hash dataflow)
private class IndexBySummary extends SimpleSummarizedCallable {
IndexBySummary() { this = "index_by" }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[self].Element[any]" and
output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?]"] and
preservesValue = true
}
}
// TODO: index_with, pick, pluck (they require Hash dataflow)
}
}

View File

@@ -258,6 +258,34 @@ edges
| hash_extensions.rb:58:10:58:10 | x [element :a] : | hash_extensions.rb:58:10:58:14 | ...[...] |
| hash_extensions.rb:59:10:59:10 | x [element :b] : | hash_extensions.rb:59:10:59:14 | ...[...] |
| hash_extensions.rb:59:10:59:10 | x [element :b] : | hash_extensions.rb:59:10:59:14 | ...[...] |
| hash_extensions.rb:67:15:67:25 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 0] : |
| hash_extensions.rb:67:15:67:25 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 0] : |
| hash_extensions.rb:67:28:67:38 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 1] : |
| hash_extensions.rb:67:28:67:38 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 1] : |
| hash_extensions.rb:67:41:67:51 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 2] : |
| hash_extensions.rb:67:41:67:51 | call to source : | hash_extensions.rb:68:9:68:14 | values [element 2] : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | hash_extensions.rb:68:9:71:7 | call to index_by [element] : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | hash_extensions.rb:68:29:68:33 | value : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | hash_extensions.rb:73:10:73:10 | h [element] : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | hash_extensions.rb:73:10:73:10 | h [element] : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | hash_extensions.rb:74:10:74:10 | h [element] : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | hash_extensions.rb:74:10:74:10 | h [element] : |
| hash_extensions.rb:68:29:68:33 | value : | hash_extensions.rb:69:14:69:18 | value |
| hash_extensions.rb:68:29:68:33 | value : | hash_extensions.rb:69:14:69:18 | value |
| hash_extensions.rb:73:10:73:10 | h [element] : | hash_extensions.rb:73:10:73:16 | ...[...] |
| hash_extensions.rb:73:10:73:10 | h [element] : | hash_extensions.rb:73:10:73:16 | ...[...] |
| hash_extensions.rb:74:10:74:10 | h [element] : | hash_extensions.rb:74:10:74:16 | ...[...] |
| hash_extensions.rb:74:10:74:10 | h [element] : | hash_extensions.rb:74:10:74:16 | ...[...] |
nodes
| active_support.rb:10:9:10:18 | call to source : | semmle.label | call to source : |
| active_support.rb:11:10:11:10 | x : | semmle.label | x : |
@@ -594,6 +622,32 @@ nodes
| hash_extensions.rb:59:10:59:10 | x [element :b] : | semmle.label | x [element :b] : |
| hash_extensions.rb:59:10:59:14 | ...[...] | semmle.label | ...[...] |
| hash_extensions.rb:59:10:59:14 | ...[...] | semmle.label | ...[...] |
| hash_extensions.rb:67:15:67:25 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:67:15:67:25 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:67:28:67:38 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:67:28:67:38 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:67:41:67:51 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:67:41:67:51 | call to source : | semmle.label | call to source : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | semmle.label | values [element 0] : |
| hash_extensions.rb:68:9:68:14 | values [element 0] : | semmle.label | values [element 0] : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | semmle.label | values [element 1] : |
| hash_extensions.rb:68:9:68:14 | values [element 1] : | semmle.label | values [element 1] : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | semmle.label | values [element 2] : |
| hash_extensions.rb:68:9:68:14 | values [element 2] : | semmle.label | values [element 2] : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | semmle.label | call to index_by [element] : |
| hash_extensions.rb:68:9:71:7 | call to index_by [element] : | semmle.label | call to index_by [element] : |
| hash_extensions.rb:68:29:68:33 | value : | semmle.label | value : |
| hash_extensions.rb:68:29:68:33 | value : | semmle.label | value : |
| hash_extensions.rb:69:14:69:18 | value | semmle.label | value |
| hash_extensions.rb:69:14:69:18 | value | semmle.label | value |
| hash_extensions.rb:73:10:73:10 | h [element] : | semmle.label | h [element] : |
| hash_extensions.rb:73:10:73:10 | h [element] : | semmle.label | h [element] : |
| hash_extensions.rb:73:10:73:16 | ...[...] | semmle.label | ...[...] |
| hash_extensions.rb:73:10:73:16 | ...[...] | semmle.label | ...[...] |
| hash_extensions.rb:74:10:74:10 | h [element] : | semmle.label | h [element] : |
| hash_extensions.rb:74:10:74:10 | h [element] : | semmle.label | h [element] : |
| hash_extensions.rb:74:10:74:16 | ...[...] | semmle.label | ...[...] |
| hash_extensions.rb:74:10:74:16 | ...[...] | semmle.label | ...[...] |
subpaths
#select
| active_support.rb:182:10:182:13 | ...[...] | active_support.rb:180:10:180:17 | call to source : | active_support.rb:182:10:182:13 | ...[...] | $@ | active_support.rb:180:10:180:17 | call to source : | call to source : |
@@ -622,3 +676,12 @@ subpaths
| hash_extensions.rb:56:10:56:14 | ...[...] | hash_extensions.rb:50:52:50:61 | call to taint : | hash_extensions.rb:56:10:56:14 | ...[...] | $@ | hash_extensions.rb:50:52:50:61 | call to taint : | call to taint : |
| hash_extensions.rb:58:10:58:14 | ...[...] | hash_extensions.rb:50:14:50:23 | call to taint : | hash_extensions.rb:58:10:58:14 | ...[...] | $@ | hash_extensions.rb:50:14:50:23 | call to taint : | call to taint : |
| hash_extensions.rb:59:10:59:14 | ...[...] | hash_extensions.rb:50:29:50:38 | call to taint : | hash_extensions.rb:59:10:59:14 | ...[...] | $@ | hash_extensions.rb:50:29:50:38 | call to taint : | call to taint : |
| hash_extensions.rb:69:14:69:18 | value | hash_extensions.rb:67:15:67:25 | call to source : | hash_extensions.rb:69:14:69:18 | value | $@ | hash_extensions.rb:67:15:67:25 | call to source : | call to source : |
| hash_extensions.rb:69:14:69:18 | value | hash_extensions.rb:67:28:67:38 | call to source : | hash_extensions.rb:69:14:69:18 | value | $@ | hash_extensions.rb:67:28:67:38 | call to source : | call to source : |
| hash_extensions.rb:69:14:69:18 | value | hash_extensions.rb:67:41:67:51 | call to source : | hash_extensions.rb:69:14:69:18 | value | $@ | hash_extensions.rb:67:41:67:51 | call to source : | call to source : |
| hash_extensions.rb:73:10:73:16 | ...[...] | hash_extensions.rb:67:15:67:25 | call to source : | hash_extensions.rb:73:10:73:16 | ...[...] | $@ | hash_extensions.rb:67:15:67:25 | call to source : | call to source : |
| hash_extensions.rb:73:10:73:16 | ...[...] | hash_extensions.rb:67:28:67:38 | call to source : | hash_extensions.rb:73:10:73:16 | ...[...] | $@ | hash_extensions.rb:67:28:67:38 | call to source : | call to source : |
| hash_extensions.rb:73:10:73:16 | ...[...] | hash_extensions.rb:67:41:67:51 | call to source : | hash_extensions.rb:73:10:73:16 | ...[...] | $@ | hash_extensions.rb:67:41:67:51 | call to source : | call to source : |
| hash_extensions.rb:74:10:74:16 | ...[...] | hash_extensions.rb:67:15:67:25 | call to source : | hash_extensions.rb:74:10:74:16 | ...[...] | $@ | hash_extensions.rb:67:15:67:25 | call to source : | call to source : |
| hash_extensions.rb:74:10:74:16 | ...[...] | hash_extensions.rb:67:28:67:38 | call to source : | hash_extensions.rb:74:10:74:16 | ...[...] | $@ | hash_extensions.rb:67:28:67:38 | call to source : | call to source : |
| hash_extensions.rb:74:10:74:16 | ...[...] | hash_extensions.rb:67:41:67:51 | call to source : | hash_extensions.rb:74:10:74:16 | ...[...] | $@ | hash_extensions.rb:67:41:67:51 | call to source : | call to source : |

View File

@@ -62,3 +62,16 @@ def m_extract!(x)
end
m_extract!(:c)
def m_index_by
values = [source("a"), source("b"), source("c")]
h = values.index_by do |value|
sink value # $ hasValueFlow=a $ hasValueFlow=b $ hasValueFlow=c
make_key(value)
end
sink h[:foo] # $ hasValueFlow=a $ hasValueFlow=b $ hasValueFlow=c
sink h[:bar] # $ hasValueFlow=a $ hasValueFlow=b $ hasValueFlow=c
end
m_index_by()

View File

@@ -166,7 +166,7 @@ module Make<RegexTreeViewSig TreeImpl> {
min(RelevantRegExpTerm t |
str = getCanonicalizationString(t)
|
t order by getTermLocationString(t)
t order by getTermLocationString(t), t.toString()
)
}
@@ -949,7 +949,7 @@ module Make<RegexTreeViewSig TreeImpl> {
isStartState(s) and
getRoot(s.getRepr()) = root
|
s order by getTermLocationString(s.getRepr())
s order by getTermLocationString(s.getRepr()), s.getRepr().toString()
)
)
}
@@ -1047,7 +1047,7 @@ module Make<RegexTreeViewSig TreeImpl> {
isCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by getTermLocationString(s.getRepr())
s order by getTermLocationString(s.getRepr()), s.getRepr().toString()
)
)
}

View File

@@ -9,7 +9,10 @@ signature module InputSig {
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*/
class BasicBlock;
class BasicBlock {
/** Gets a textual representation of this basic block. */
string toString();
}
/**
* Gets the basic block that immediately dominates basic block `bb`, if any.
@@ -43,7 +46,10 @@ signature module InputSig {
class ExitBasicBlock extends BasicBlock;
/** A variable that can be SSA converted. */
class SourceVariable;
class SourceVariable {
/** Gets a textual representation of this variable. */
string toString();
}
/**
* Holds if the `i`th node of basic block `bb` is a (potential) write to source
@@ -846,8 +852,6 @@ module Make<InputSig Input> {
}
/** Provides a set of consistency queries. */
// TODO: Make these `query` predicates once class signatures are supported
// (`SourceVariable` and `BasicBlock` must have `toString`)
module Consistency {
/** A definition that is relevant for the consistency queries. */
abstract class RelevantDefinition extends Definition {
@@ -858,19 +862,19 @@ module Make<InputSig Input> {
}
/** Holds if a read can be reached from multiple definitions. */
predicate nonUniqueDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
query predicate nonUniqueDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
ssaDefReachesRead(v, def, bb, i) and
not exists(unique(Definition def0 | ssaDefReachesRead(v, def0, bb, i)))
}
/** Holds if a read cannot be reached from a definition. */
predicate readWithoutDef(SourceVariable v, BasicBlock bb, int i) {
query predicate readWithoutDef(SourceVariable v, BasicBlock bb, int i) {
variableRead(bb, i, v, _) and
not ssaDefReachesRead(v, _, bb, i)
}
/** Holds if a definition cannot reach a read. */
predicate deadDef(RelevantDefinition def, SourceVariable v) {
query predicate deadDef(RelevantDefinition def, SourceVariable v) {
v = def.getSourceVariable() and
not ssaDefReachesRead(_, def, _, _) and
not phiHasInputFromBlock(_, def, _) and
@@ -878,7 +882,7 @@ module Make<InputSig Input> {
}
/** Holds if a read is not dominated by a definition. */
predicate notDominatedByDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
query predicate notDominatedByDef(RelevantDefinition def, SourceVariable v, BasicBlock bb, int i) {
exists(BasicBlock bbDef, int iDef | def.definesAt(v, bbDef, iDef) |
ssaDefReachesReadWithinBlock(v, def, bb, i) and
(bb != bbDef or i < iDef)

View File

@@ -20,8 +20,8 @@ class XxeAdditionalTaintStep extends Unit {
}
/** The XML argument of a `XMLParser` vulnerable to XXE. */
private class DefaultXxeSink extends XxeSink {
DefaultXxeSink() {
private class XmlParserXxeSink extends XxeSink {
XmlParserXxeSink() {
this.asExpr() = any(Argument a | a.getApplyExpr() instanceof VulnerableParser).getExpr()
}
}
@@ -67,3 +67,26 @@ private class XmlParserRef extends Expr {
private class XmlParserType extends NominalType {
XmlParserType() { this.getFullName() = "XMLParser" }
}
/** The XML argument of a `XMLDocument` vulnerable to XXE. */
private class XmlDocumentXxeSink extends XxeSink {
XmlDocumentXxeSink() { this.asExpr() = any(VulnerableXmlDocument d).getArgument(0).getExpr() }
}
/** An `XMLDocument` that sets `nodeLoadExternalEntitiesAlways` in its options. */
private class VulnerableXmlDocument extends ApplyExpr {
VulnerableXmlDocument() {
this.getStaticTarget().(ConstructorDecl).getEnclosingDecl().(NominalTypeDecl).getFullName() =
"XMLDocument" and
this.getArgument(1).getExpr().(ArrayExpr).getAnElement().(MemberRefExpr).getMember() instanceof
NodeLoadExternalEntitiesAlways
}
}
/** The option `XMLNode.Options.nodeLoadExternalEntitiesAlways`. */
private class NodeLoadExternalEntitiesAlways extends VarDecl {
NodeLoadExternalEntitiesAlways() {
this.getName() = "nodeLoadExternalEntitiesAlways" and
this.getEnclosingDecl().(StructDecl).getFullName() = "XMLNode.Options"
}
}

View File

@@ -0,0 +1,86 @@
// --- stubs ---
class Data {
init<S>(_ elements: S) {}
}
struct URL {
init?(string: String) {}
}
extension String {
init(contentsOf: URL) {
let data = ""
self.init(data)
}
}
class XMLNode {
struct Options : OptionSet {
let rawValue: Int
static let nodeLoadExternalEntitiesAlways = XMLNode.Options(rawValue: 1 << 0)
static let nodeLoadExternalEntitiesNever = XMLNode.Options(rawValue: 1 << 1)
}
}
class XMLElement {}
class XMLDocument {
init(contentsOf: URL, options: XMLNode.Options = []) {}
init(data: Data, options: XMLNode.Options = []) {}
init(rootElement: XMLElement?) {}
init(xmlString: String, options: XMLNode.Options = []) {}
}
// --- tests ---
func testUrl() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteUrl = URL(string: remoteString)!
let _ = XMLDocument(contentsOf: remoteUrl, options: [.nodeLoadExternalEntitiesAlways]) // $ hasXXE=38
}
func testUrlSafeImplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteUrl = URL(string: remoteString)!
let _ = XMLDocument(contentsOf: remoteUrl, options: []) // NO XXE: document doesn't enable external entities
}
func testUrlSafeExplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteUrl = URL(string: remoteString)!
let _ = XMLDocument(contentsOf: remoteUrl, options: [.nodeLoadExternalEntitiesNever]) // NO XXE: document disables external entities
}
func testData() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteData = Data(remoteString)
let _ = XMLDocument(data: remoteData, options: [.nodeLoadExternalEntitiesAlways]) // $ hasXXE=56
}
func testDataSafeImplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteData = Data(remoteString)
let _ = XMLDocument(data: remoteData, options: []) // NO XXE: document doesn't enable external entities
}
func testDataSafeExplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let remoteData = Data(remoteString)
let _ = XMLDocument(data: remoteData, options: [.nodeLoadExternalEntitiesNever]) // NO XXE: document disables external entities
}
func testString() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let _ = XMLDocument(xmlString: remoteString, options: [.nodeLoadExternalEntitiesAlways]) // $ hasXXE=74
}
func testStringSafeImplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let _ = XMLDocument(xmlString: remoteString, options: []) // NO XXE: document doesn't enable external entities
}
func testStringSafeExplicit() {
let remoteString = String(contentsOf: URL(string: "http://example.com/")!)
let _ = XMLDocument(xmlString: remoteString, options: [.nodeLoadExternalEntitiesNever]) // NO XXE: document disables external entities
}