Merge branch 'master' of github.com:github/codeql into SharedDataflow

To sync files
This commit is contained in:
Rasmus Lerchedahl Petersen
2020-06-26 12:01:01 +02:00
389 changed files with 8998 additions and 4552 deletions

View File

@@ -4,8 +4,8 @@
* the arguments with which it is called, and if it were called, would be likely to cause an error.
* @kind problem
* @tags maintainability
* @problem.severity error
* @sub-severity low
* @problem.severity recommendation
* @sub-severity high
* @precision high
* @id py/inheritance/incorrect-overridden-signature
*/

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
<qhelp>
<overview>
<p>
Using user-supplied information to construct an XPath query for XML data can
result in an XPath injection flaw. By sending intentionally malformed information,
an attacker can access data that he may not normally have access to.
He/She may even be able to elevate his privileges on the web site if the XML data
is being used for authentication (such as an XML based user file).
</p>
</overview>
<recommendation>
<p>
XPath injection can be prevented using parameterized XPath interface or escaping the user input to make it safe to include in a dynamically constructed query.
If you are using quotes to terminate untrusted input in a dynamically constructed XPath query, then you need to escape that quote in the untrusted input to ensure the untrusted data cant try to break out of that quoted context.
</p>
<p>
Another better mitigation option is to use a precompiled XPath query. Precompiled XPath queries are already preset before the program executes, rather than created on the fly after the users input has been added to the string. This is a better route because you dont have to worry about missing a character that should have been escaped.
</p>
</recommendation>
<example>
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
<sample src="xpathBad.py" />
<p> This can be fixed by using a parameterized query as shown below.</p>
<sample src="xpathGood.py" />
</example>
<references>
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
</references>
</qhelp>

View File

@@ -0,0 +1,35 @@
/**
* @name XPath query built from user-controlled sources
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
* malicious Xpath code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xpath-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.Xpath
class XpathInjectionConfiguration extends TaintTracking::Configuration {
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XpathInjection::XpathInjectionSink
}
}
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,18 @@
from lxml import etree
from io import StringIO
from django.urls import path
from django.http import HttpResponse
from django.template import Template, Context, Engine, engines
def a(request):
value = request.GET['xpath']
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath("/tag[@id='%s']" % value)
urlpatterns = [
path('a', a)
]

View File

@@ -0,0 +1,18 @@
from lxml import etree
from io import StringIO
from django.urls import path
from django.http import HttpResponse
from django.template import Template, Context, Engine, engines
def a(request):
value = request.GET['xpath']
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath("/tag[@id=$tagid]", tagid=value)
urlpatterns = [
path('a', a)
]

View File

@@ -0,0 +1,115 @@
/**
* Provides class and predicates to track external data that
* may represent malicious xpath query objects.
*
* This module is intended to be imported into a taint-tracking query
* to extend `TaintKind` and `TaintSink`.
*/
import python
import semmle.python.dataflow.TaintTracking
import semmle.python.web.HttpRequest
/** Models Xpath Injection related classes and functions */
module XpathInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }
/** Returns a class value which refers to `lxml.etree` */
Value libxml2parseFile() { result = Value::named("libxml2.parseFile") }
/** A generic taint sink that is vulnerable to Xpath injection. */
abstract class XpathInjectionSink extends TaintSink { }
/**
* A Sink representing an argument to the `etree.Xpath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XPath("`sink`")
*/
private class EtreeXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.Xpath" }
EtreeXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `etree.EtXpath` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.EtXPath("`sink`")
*/
private class EtreeETXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.ETXpath" }
EtreeETXpathArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("ETXPath").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpath` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* f = StringIO('<foo><bar></bar></foo>')
* tree = etree.parse(f)
* r = tree.xpath('`sink`')
*/
private class ParseXpathArgument extends XpathInjectionSink {
override string toString() { result = "lxml.etree.parse.xpath" }
ParseXpathArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpath") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/**
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*/
private class ParseFileXpathEvalArgument extends XpathInjectionSink {
override string toString() { result = "libxml2.parseFile.xpathEval" }
ParseFileXpathEvalArgument() {
exists(
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).pointsTo(libxml2parseFile()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xpathCall.getFunction().(AttrNode).getObject("xpathEval") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xpathCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
}

View File

@@ -497,8 +497,12 @@ abstract class RegexString extends Expr {
this.getChar(endin) = "}" and
end > start and
exists(string multiples | multiples = this.getText().substring(start + 1, endin) |
multiples.regexpMatch("0+") and maybe_empty = true
or
multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true
or
multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false
or
multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false
) and
not exists(int mid |
@@ -643,9 +647,13 @@ abstract class RegexString extends Expr {
start = 0 and end = this.getText().length()
or
exists(int y | this.lastPart(start, y) |
this.emptyMatchAtEndGroup(end, y) or
this.qualifiedItem(end, y, true) or
this.emptyMatchAtEndGroup(end, y)
or
this.qualifiedItem(end, y, true)
or
this.specialCharacter(end, y, "$")
or
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
)
or
exists(int x |

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/

View File

@@ -0,0 +1,38 @@
edges
| xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:10:13:10:19 | django.request.HttpRequest |
| xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:10:13:10:19 | django.request.HttpRequest |
| xpathBad.py:10:13:10:19 | django.request.HttpRequest | xpathBad.py:10:13:10:23 | django.http.request.QueryDict |
| xpathBad.py:10:13:10:19 | django.request.HttpRequest | xpathBad.py:10:13:10:23 | django.http.request.QueryDict |
| xpathBad.py:10:13:10:23 | django.http.request.QueryDict | xpathBad.py:10:13:10:32 | externally controlled string |
| xpathBad.py:10:13:10:23 | django.http.request.QueryDict | xpathBad.py:10:13:10:32 | externally controlled string |
| xpathBad.py:10:13:10:32 | externally controlled string | xpathBad.py:13:39:13:43 | externally controlled string |
| xpathBad.py:10:13:10:32 | externally controlled string | xpathBad.py:13:39:13:43 | externally controlled string |
| xpathBad.py:13:39:13:43 | externally controlled string | xpathBad.py:13:20:13:43 | externally controlled string |
| xpathBad.py:13:39:13:43 | externally controlled string | xpathBad.py:13:20:13:43 | externally controlled string |
| xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:11:18:11:44 | externally controlled string |
| xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:11:18:11:44 | externally controlled string |
| xpathFlow.py:11:18:11:44 | externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string |
| xpathFlow.py:11:18:11:44 | externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string |
| xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:20:18:20:44 | externally controlled string |
| xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:20:18:20:44 | externally controlled string |
| xpathFlow.py:20:18:20:44 | externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string |
| xpathFlow.py:20:18:20:44 | externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string |
| xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:30:18:30:44 | externally controlled string |
| xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:30:18:30:44 | externally controlled string |
| xpathFlow.py:30:18:30:44 | externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string |
| xpathFlow.py:30:18:30:44 | externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string |
| xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:39:18:39:44 | externally controlled string |
| xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:39:18:39:44 | externally controlled string |
| xpathFlow.py:39:18:39:44 | externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string |
| xpathFlow.py:39:18:39:44 | externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string |
| xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:47:18:47:44 | externally controlled string |
| xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:47:18:47:44 | externally controlled string |
| xpathFlow.py:47:18:47:44 | externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string |
| xpathFlow.py:47:18:47:44 | externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string |
#select
| xpathBad.py:13:20:13:43 | BinaryExpr | xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:13:20:13:43 | externally controlled string | This Xpath query depends on $@. | xpathBad.py:9:7:9:13 | request | a user-provided value |
| xpathFlow.py:14:20:14:29 | xpathQuery | xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:11:18:11:29 | Attribute | a user-provided value |
| xpathFlow.py:23:29:23:38 | xpathQuery | xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:20:18:20:29 | Attribute | a user-provided value |
| xpathFlow.py:32:29:32:38 | xpathQuery | xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:30:18:30:29 | Attribute | a user-provided value |
| xpathFlow.py:41:31:41:40 | xpathQuery | xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:39:18:39:29 | Attribute | a user-provided value |
| xpathFlow.py:49:29:49:38 | xpathQuery | xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:47:18:47:29 | Attribute | a user-provided value |

View File

@@ -0,0 +1,40 @@
from lxml import etree
from io import StringIO
def a():
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath('/foo/bar')
def b():
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.XPath("//text()")
text = find_text(root)[0]
def c():
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.XPath("//text()", smart_strings=False)
text = find_text(root)[0]
def d():
root = etree.XML("<root><a>TEXT</a></root>")
find_text = find = etree.ETXPath("//{ns}b")
text = find_text(root)[0]
def e():
import libxml2
doc = libxml2.parseFile('xpath_injection/credential.xml')
results = doc.xpathEval('sink')
if __name__ == "__main__":
a()
b()
c()
d()
e()

View File

@@ -0,0 +1 @@
experimental/CWE-643/xpath.ql

View File

@@ -0,0 +1,18 @@
from lxml import etree
from io import StringIO
from django.urls import path
from django.http import HttpResponse
from django.template import Template, Context, Engine, engines
def a(request):
value = request.GET['xpath']
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath("/tag[@id='%s']" % value)
urlpatterns = [
path('a', a)
]

View File

@@ -0,0 +1,49 @@
from io import StringIO
from flask import Flask, request
app = Flask(__name__)
@app.route("/xpath1")
def xpath1():
from lxml import etree
xpathQuery = request.args.get('xml', '')
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath(xpathQuery)
@app.route("/xpath2")
def xpath2():
from lxml import etree
xpathQuery = request.args.get('xml', '')
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.XPath(xpathQuery)
text = find_text(root)[0]
@app.route("/xpath3")
def xpath3():
from lxml import etree
xpathQuery = request.args.get('xml', '')
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.XPath(xpathQuery, smart_strings=False)
text = find_text(root)[0]
@app.route("/xpath4")
def xpath4():
from lxml import etree
xpathQuery = request.args.get('xml', '')
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.ETXPath(xpathQuery)
text = find_text(root)[0]
@app.route("/xpath5")
def xpath5():
import libxml2
xpathQuery = request.args.get('xml', '')
doc = libxml2.parseFile('xpath_injection/credential.xml')
results = doc.xpathEval(xpathQuery)

View File

@@ -0,0 +1,18 @@
from lxml import etree
from io import StringIO
from django.urls import path
from django.http import HttpResponse
from django.template import Template, Context, Engine, engines
def a(request):
value = request.GET['xpath']
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
r = tree.xpath("/tag[@id=$tagid]", tagid=value)
urlpatterns = [
path('a', a)
]

View File

@@ -0,0 +1,12 @@
| xpath.py:8:20:8:29 | lxml.etree.parse.xpath | externally controlled string |
| xpath.py:13:29:13:38 | lxml.etree.Xpath | externally controlled string |
| xpath.py:19:29:19:38 | lxml.etree.Xpath | externally controlled string |
| xpath.py:25:38:25:46 | lxml.etree.ETXpath | externally controlled string |
| xpath.py:32:29:32:34 | libxml2.parseFile.xpathEval | externally controlled string |
| xpathBad.py:13:20:13:43 | lxml.etree.parse.xpath | externally controlled string |
| xpathFlow.py:14:20:14:29 | lxml.etree.parse.xpath | externally controlled string |
| xpathFlow.py:23:29:23:38 | lxml.etree.Xpath | externally controlled string |
| xpathFlow.py:32:29:32:38 | lxml.etree.Xpath | externally controlled string |
| xpathFlow.py:41:31:41:40 | lxml.etree.ETXpath | externally controlled string |
| xpathFlow.py:49:29:49:38 | libxml2.parseFile.xpathEval | externally controlled string |
| xpathGood.py:13:20:13:37 | lxml.etree.parse.xpath | externally controlled string |

View File

@@ -0,0 +1,6 @@
import python
import experimental.semmle.python.security.injection.Xpath
from XpathInjection::XpathInjectionSink sink, TaintKind kind
where sink.sinks(kind)
select sink, kind

View File

@@ -110,7 +110,6 @@
| ax{3,} | 5 | 6 |
| ax{3} | 0 | 1 |
| ax{3} | 1 | 2 |
| ax{3} | 2 | 3 |
| ax{3} | 3 | 4 |
| ax{3} | 4 | 5 |
| ax{,3} | 0 | 1 |

View File

@@ -84,6 +84,8 @@
| ax{3,} | last | 1 | 6 |
| ax{3,} | last | 5 | 6 |
| ax{3} | first | 0 | 1 |
| ax{3} | last | 1 | 2 |
| ax{3} | last | 1 | 5 |
| ax{3} | last | 4 | 5 |
| ax{,3} | first | 0 | 1 |
| ax{,3} | last | 0 | 1 |

View File

@@ -11,4 +11,5 @@
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
| ax{01,3} | 1 | 8 | false |
| ax{3,} | 1 | 6 | false |
| ax{3} | 1 | 5 | false |
| ax{,3} | 1 | 6 | true |

View File

@@ -207,9 +207,9 @@
| ax{3,} | sequence | 0 | 6 |
| ax{3} | char | 0 | 1 |
| ax{3} | char | 1 | 2 |
| ax{3} | char | 2 | 3 |
| ax{3} | char | 3 | 4 |
| ax{3} | char | 4 | 5 |
| ax{3} | qualified | 1 | 5 |
| ax{3} | sequence | 0 | 5 |
| ax{,3} | char | 0 | 1 |
| ax{,3} | char | 1 | 2 |

View File

@@ -1,3 +1,3 @@
| test.py:41:12:41:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
| test.py:42:12:42:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
| test.py:43:12:43:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |
| test.py:46:12:46:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
| test.py:47:12:47:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
| test.py:48:12:48:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |

View File

@@ -1,4 +1,4 @@
| test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. |
| test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. |
| test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. |
| test.py:74:12:74:27 | Str | This regular expression includes an unmatchable caret at offset 8. |
| test.py:79:12:79:27 | Str | This regular expression includes an unmatchable caret at offset 8. |

View File

@@ -1,4 +1,4 @@
| test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. |
| test.py:75:12:75:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:80:12:80:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |

View File

@@ -30,12 +30,17 @@ re.compile(b"abc$ ")
re.compile(b"abc$ (?s)")
re.compile(b"\[$] ")
#Likely false positives for unmatchable dollar
re.compile(b"[$] ")
re.compile(b"\$ ")
re.compile(b"abc$(?m)")
re.compile(b"abc$()")
#Not unmatchable dollar
re.match(b"[$] ", b"$ ")
re.match(b"\$ ", b"$ ")
re.match(b"abc$(?m)", b"abc")
re.match(b"abc$()", b"abc")
re.match(b"((a$)|b)*", b"bba")
re.match(b"((a$)|b){4}", b"bbba") # Inspired by FP report here: https://github.com/github/codeql/issues/2403
re.match(b"((a$).*)", b"a")
re.match("(\Aab$|\Aba$)$\Z", "ab")
re.match(b"((a$\Z)|b){4}", b"bbba")
re.match(b"(a){00}b", b"b")
#Duplicate character in set
re.compile(b"[AA]")

View File

@@ -0,0 +1,10 @@
def parseFile(filename):
return xmlDoc(_obj=None)
class xmlDoc(Object):
def __init__(self, _obj=None):
pass
def xpathEval(self, expr):
pass

View File

@@ -0,0 +1,37 @@
class _ElementTree(object):
def xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables):
pass
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
pass
class ETXPath(object):
def __init__(self, path, extensions=None, regexp=True, smart_strings=True):
pass
class XPath(object):
def __init__(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True):
pass
class XSLT(object):
def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
pass
def parse(self, parser=None, base_url=None):
return _ElementTree()
def fromstring(self, text, parser=None, base_url=None):
pass
def fromstringlist(self, strings, parser=None):
pass
def XML(self, text, parser=None, base_url=None):
pass