Merge remote-tracking branch 'origin/main' into jty/python/nosqlInjection

This commit is contained in:
jorgectf
2021-08-10 20:01:08 +02:00
3028 changed files with 246210 additions and 35181 deletions

View File

@@ -181,14 +181,14 @@ private int getEndOfColumnPosition(int start, string content) {
min(string name, int cand |
exists(TNamedColumn(name)) and
cand = content.indexOf(name + ":") and
cand > start
cand >= start
|
cand
)
or
not exists(string name |
exists(TNamedColumn(name)) and
content.indexOf(name + ":") > start
content.indexOf(name + ":") >= start
) and
result = content.length()
}

View File

@@ -0,0 +1,17 @@
import pkg # $ use=moduleImport("pkg")
async def foo():
coro = pkg.async_func() # $ use=moduleImport("pkg").getMember("async_func").getReturn()
coro # $ use=moduleImport("pkg").getMember("async_func").getReturn()
result = await coro # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
return result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
async def bar():
result = await pkg.async_func() # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
return result # $ use=moduleImport("pkg").getMember("async_func").getReturn().getAwaited()
def check_annotations():
# Just to make sure how annotations should look like :)
result = pkg.sync_func() # $ use=moduleImport("pkg").getMember("sync_func").getReturn()
return result # $ use=moduleImport("pkg").getMember("sync_func").getReturn()

View File

@@ -1 +1 @@
semmle-extractor-options: --lang=3
semmle-extractor-options: --lang=3 --max-import-depth=1

View File

@@ -13,7 +13,8 @@ class ApiUseTest extends InlineExpectationsTest {
l = n.getLocation() and
// Module variable nodes have no suitable location, so it's best to simply exclude them entirely
// from the inline tests.
not n instanceof DataFlow::ModuleVariableNode
not n instanceof DataFlow::ModuleVariableNode and
exists(l.getFile().getRelativePath())
}
override predicate hasActualResult(Location location, string element, string tag, string value) {

View File

@@ -0,0 +1,8 @@
conjunctive_lookup
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | bar |
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | foo |
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | bar |
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | foo |
calls_lookup
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj1 | foo |
| test.py:6:1:6:6 | ControlFlowNode for meth() | meth() | obj2 | bar |

View File

@@ -0,0 +1,6 @@
if cond:
meth = obj1.foo
else:
meth = obj2.bar
meth()

View File

@@ -0,0 +1,18 @@
import python
import semmle.python.dataflow.new.DataFlow
import experimental.dataflow.TestUtil.PrintNode
query predicate conjunctive_lookup(
DataFlow::MethodCallNode methCall, string call, string object, string methodName
) {
call = prettyNode(methCall) and
object = prettyNode(methCall.getObject()) and
methodName = methCall.getMethodName()
}
query predicate calls_lookup(
DataFlow::MethodCallNode methCall, string call, string object, string methodName
) {
call = prettyNode(methCall) and
exists(DataFlow::Node o | methCall.calls(o, methodName) and object = prettyNode(o))
}

View File

@@ -1,12 +1,17 @@
// /**
// * @kind path-problem
// */
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import TestUtilities.InlineExpectationsTest
import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.ApiGraphs
class SensitiveDataSourcesTest extends InlineExpectationsTest {
SensitiveDataSourcesTest() { this = "SensitiveDataSourcesTest" }
override string getARelevantTag() { result = "SensitiveDataSource" }
override string getARelevantTag() { result in ["SensitiveDataSource", "SensitiveUse"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
@@ -15,6 +20,32 @@ class SensitiveDataSourcesTest extends InlineExpectationsTest {
element = source.toString() and
value = source.getClassification() and
tag = "SensitiveDataSource"
or
exists(DataFlow::Node use |
any(SensitiveUseConfiguration config).hasFlow(source, use) and
location = use.getLocation() and
element = use.toString() and
value = source.getClassification() and
tag = "SensitiveUse"
)
)
}
}
class SensitiveUseConfiguration extends TaintTracking::Configuration {
SensitiveUseConfiguration() { this = "SensitiveUseConfiguration" }
override predicate isSource(DataFlow::Node node) { node instanceof SensitiveDataSource }
override predicate isSink(DataFlow::Node node) {
node = API::builtin("print").getACall().getArg(_)
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
// import DataFlow::PathGraph
// from SensitiveUseConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
// where cfg.hasFlowPath(source, sink)
// select sink, source, sink, "taint from $@", source.getNode(), "here"

View File

@@ -1,5 +1,6 @@
from not_found import get_passwd, account_id
from not_found import get_passwd # $ SensitiveDataSource=password
from not_found import account_id # $ SensitiveDataSource=id
def get_password():
pass
@@ -20,14 +21,94 @@ fetch_certificate() # $ SensitiveDataSource=certificate
account_id() # $ SensitiveDataSource=id
safe_to_store = encrypt_password(pwd)
f = get_password
f() # $ SensitiveDataSource=password
# more tests of functions we don't have definition for
x = unkown_func_not_even_imported_get_password() # $ SensitiveDataSource=password
print(x) # $ SensitiveUse=password
f = get_passwd
x = f()
print(x) # $ SensitiveUse=password
import not_found
f = not_found.get_passwd # $ SensitiveDataSource=password
x = f()
print(x) # $ SensitiveUse=password
def my_func(non_sensitive_name):
x = non_sensitive_name()
print(x) # $ SensitiveUse=password
f = not_found.get_passwd # $ SensitiveDataSource=password
my_func(f)
# attributes
foo = ObjectFromDatabase()
foo.secret # $ SensitiveDataSource=secret
foo.username # $ SensitiveDataSource=id
getattr(foo, "password") # $ SensitiveDataSource=password
x = "password"
getattr(foo, x) # $ SensitiveDataSource=password
# based on variable/parameter names
def my_func(password): # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
password = some_function() # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
for password in some_function2(): # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
with some_function3() as password: # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
# Special handling of lookups of sensitive properties
request.args["password"], # $ MISSING: SensitiveDataSource=password
request.args["password"], # $ SensitiveDataSource=password
request.args.get("password") # $ SensitiveDataSource=password
x = "password"
request.args.get(x) # $ SensitiveDataSource=password
# I don't think handling `getlist` is super important, just included it to show what we don't handle
request.args.getlist("password")[0] # $ MISSING: SensitiveDataSource=password
from not_found import password2 as foo # $ SensitiveDataSource=password
print(foo) # $ SensitiveUse=password
# ------------------------------------------------------------------------------
# cross-talk between different calls
# ------------------------------------------------------------------------------
# Case 1: providing name as argument
_configuration = {"sleep_timer": 5, "mysql_password": "1234"}
def get_config(key):
# Treating this as a SensitiveDataSource is questionable, since that will result in
# _all_ calls to `get_config` being treated as giving sensitive data
return _configuration[key]
foo = get_config("mysql_password")
print(foo) # $ MISSING: SensitiveUse=password
bar = get_config("sleep_timer")
print(bar)
# Case 2: Providing function as argument
def call_wrapper(func):
print("Will call", func)
# Treating this as a SensitiveDataSource is questionable, since that will result in
# _all_ calls to `call_wrapper` being treated as giving sensitive data
return func() # $ SensitiveDataSource=password
foo = call_wrapper(get_password)
print(foo) # $ SensitiveUse=password
harmless = lambda: "bar"
bar = call_wrapper(harmless)
print(bar) # $ SPURIOUS: SensitiveUse=password

View File

@@ -104,7 +104,7 @@ def non_syntactic():
_str = str
ensure_tainted(
meth(), # $ MISSING: tainted
_str(ts), # $ MISSING: tainted
_str(ts), # $ tainted
)

View File

@@ -2,7 +2,7 @@ import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TypeTracker
private DataFlow::LocalSourceNode module_tracker(TypeTracker t) {
private DataFlow::TypeTrackingNode module_tracker(TypeTracker t) {
t.start() and
result = DataFlow::importNode("module")
or
@@ -13,7 +13,7 @@ query DataFlow::Node module_tracker() {
module_tracker(DataFlow::TypeTracker::end()).flowsTo(result)
}
private DataFlow::LocalSourceNode module_attr_tracker(TypeTracker t) {
private DataFlow::TypeTrackingNode module_attr_tracker(TypeTracker t) {
t.startInAttr("attr") and
result = module_tracker()
or

View File

@@ -6,7 +6,7 @@ import TestUtilities.InlineExpectationsTest
// -----------------------------------------------------------------------------
// tracked
// -----------------------------------------------------------------------------
private DataFlow::LocalSourceNode tracked(TypeTracker t) {
private DataFlow::TypeTrackingNode tracked(TypeTracker t) {
t.start() and
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
or
@@ -34,14 +34,14 @@ class TrackedTest extends InlineExpectationsTest {
// -----------------------------------------------------------------------------
// int + str
// -----------------------------------------------------------------------------
private DataFlow::LocalSourceNode int_type(TypeTracker t) {
private DataFlow::TypeTrackingNode int_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
or
exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
}
private DataFlow::LocalSourceNode string_type(TypeTracker t) {
private DataFlow::TypeTrackingNode string_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
or
@@ -83,7 +83,7 @@ class TrackedStringTest extends InlineExpectationsTest {
// -----------------------------------------------------------------------------
// tracked_self
// -----------------------------------------------------------------------------
private DataFlow::LocalSourceNode tracked_self(TypeTracker t) {
private DataFlow::TypeTrackingNode tracked_self(TypeTracker t) {
t.start() and
exists(Function f |
f.isMethod() and
@@ -117,7 +117,7 @@ class TrackedSelfTest extends InlineExpectationsTest {
// -----------------------------------------------------------------------------
// This modeling follows the same pattern that we currently use in our real library modeling.
/** Gets a reference to `foo` (fictive module). */
private DataFlow::LocalSourceNode foo(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode foo(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("foo")
or
@@ -128,7 +128,7 @@ private DataFlow::LocalSourceNode foo(DataFlow::TypeTracker t) {
DataFlow::Node foo() { foo(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to `foo.bar` (fictive module). */
private DataFlow::LocalSourceNode foo_bar(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode foo_bar(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("foo.bar")
or
@@ -142,7 +142,7 @@ private DataFlow::LocalSourceNode foo_bar(DataFlow::TypeTracker t) {
DataFlow::Node foo_bar() { foo_bar(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to `foo.bar.baz` (fictive attribute on `foo.bar` module). */
private DataFlow::LocalSourceNode foo_bar_baz(DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode foo_bar_baz(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("foo.bar.baz")
or

View File

@@ -0,0 +1,3 @@
import python
import experimental.meta.ConceptsTest
import experimental.semmle.python.frameworks.SqlAlchemy

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1,2 @@
import experimental.meta.InlineTaintTest
import experimental.semmle.python.frameworks.SqlAlchemy

View File

@@ -0,0 +1,57 @@
import sqlalchemy
from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.pool import StaticPool
from sqlalchemy.orm import relationship, backref, sessionmaker, joinedload
from sqlalchemy.sql import text
engine = create_engine(
'sqlite:///:memory:',
echo=True,
connect_args={"check_same_thread": False},
poolclass=StaticPool
)
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
ed_user = User(name='ed')
ed_user2 = User(name='george')
session.add(ed_user)
session.add(ed_user2)
session.commit()
# Injection without requiring the text() taint-step
session.query(User).filter_by(name="some sql") # $ MISSING: getSql="some sql"
session.scalar("some sql") # $ getSql="some sql"
engine.scalar("some sql") # $ getSql="some sql"
session.execute("some sql") # $ getSql="some sql"
with engine.connect() as connection:
connection.execute("some sql") # $ getSql="some sql"
with engine.begin() as connection:
connection.execute("some sql") # $ getSql="some sql"
# Injection requiring the text() taint-step
t = text("some sql")
session.query(User).filter(t) # $ getSql=t
session.query(User).group_by(User.id).having(t) # $ getSql=User.id MISSING: getSql=t
session.query(User).group_by(t).first() # $ getSql=t
session.query(User).order_by(t).first() # $ getSql=t
query = select(User).where(User.name == t) # $ MISSING: getSql=t
with engine.connect() as conn:
conn.execute(query) # $ getSql=query

View File

@@ -0,0 +1,12 @@
import sqlalchemy
def test_taint():
ts = TAINTED_STRING
ensure_tainted(
ts, # $ tainted
sqlalchemy.text(ts), # $ tainted
sqlalchemy.sql.text(ts),# $ tainted
sqlalchemy.sql.expression.text(ts),# $ tainted
sqlalchemy.sql.expression.TextClause(ts),# $ tainted
)

View File

@@ -93,6 +93,23 @@ class EncodingTest extends InlineExpectationsTest {
}
}
class LoggingTest extends InlineExpectationsTest {
LoggingTest() { this = "LoggingTest" }
override string getARelevantTag() { result in ["loggingInput"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(Logging logging, DataFlow::Node data |
location = data.getLocation() and
element = data.toString() and
value = prettyNodeForInlineTest(data) and
data = logging.getAnInput() and
tag = "loggingInput"
)
}
}
class CodeExecutionTest extends InlineExpectationsTest {
CodeExecutionTest() { this = "CodeExecutionTest" }
@@ -129,6 +146,38 @@ class SqlExecutionTest extends InlineExpectationsTest {
}
}
class EscapingTest extends InlineExpectationsTest {
EscapingTest() { this = "EscapingTest" }
override string getARelevantTag() { result in ["escapeInput", "escapeOutput", "escapeKind"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(Escaping esc |
exists(DataFlow::Node data |
location = data.getLocation() and
element = data.toString() and
value = prettyNodeForInlineTest(data) and
(
data = esc.getAnInput() and
tag = "escapeInput"
or
data = esc.getOutput() and
tag = "escapeOutput"
)
)
or
exists(string format |
location = esc.getLocation() and
element = format and
value = format and
format = esc.getKind() and
tag = "escapeKind"
)
)
}
}
class HttpServerRouteSetupTest extends InlineExpectationsTest {
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }
@@ -252,6 +301,38 @@ class HttpServerHttpRedirectResponseTest extends InlineExpectationsTest {
}
}
class HttpServerCookieWriteTest extends InlineExpectationsTest {
HttpServerCookieWriteTest() { this = "HttpServerCookieWriteTest" }
override string getARelevantTag() {
result in ["CookieWrite", "CookieRawHeader", "CookieName", "CookieValue"]
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(HTTP::Server::CookieWrite cookieWrite |
location = cookieWrite.getLocation() and
(
element = cookieWrite.toString() and
value = "" and
tag = "CookieWrite"
or
element = cookieWrite.toString() and
value = prettyNodeForInlineTest(cookieWrite.getHeaderArg()) and
tag = "CookieRawHeader"
or
element = cookieWrite.toString() and
value = prettyNodeForInlineTest(cookieWrite.getNameArg()) and
tag = "CookieName"
or
element = cookieWrite.toString() and
value = prettyNodeForInlineTest(cookieWrite.getValueArg()) and
tag = "CookieValue"
)
)
}
}
class FileSystemAccessTest extends InlineExpectationsTest {
FileSystemAccessTest() { this = "FileSystemAccessTest" }
@@ -269,6 +350,23 @@ class FileSystemAccessTest extends InlineExpectationsTest {
}
}
class FileSystemWriteAccessTest extends InlineExpectationsTest {
FileSystemWriteAccessTest() { this = "FileSystemWriteAccessTest" }
override string getARelevantTag() { result = "fileWriteData" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(FileSystemWriteAccess write, DataFlow::Node data |
data = write.getADataNode() and
location = data.getLocation() and
element = data.toString() and
value = prettyNodeForInlineTest(data) and
tag = "fileWriteData"
)
}
}
class PathNormalizationTest extends InlineExpectationsTest {
PathNormalizationTest() { this = "PathNormalizationTest" }

View File

@@ -0,0 +1,10 @@
| auth_bad_2.py:19:5:19:42 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_2.py:33:5:33:44 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_2.py:47:5:47:43 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_2.py:60:5:60:52 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_2.py:73:5:73:39 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_2.py:87:5:87:48 | ControlFlowNode for Attribute() | The following LDAP bind operation is executed without authentication |
| auth_bad_3.py:19:12:19:43 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
| auth_bad_3.py:33:12:33:57 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
| auth_bad_3.py:46:12:46:55 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |
| auth_bad_3.py:60:12:60:42 | ControlFlowNode for Connection() | The following LDAP bind operation is executed without authentication |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-287/ImproperLdapAuth.ql

View File

@@ -0,0 +1,92 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
app = Flask(__name__)
@app.route("/simple_bind_example")
def simple_bind_example():
"""
The bind's password argument is not set
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.simple_bind('cn=root')
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/simple_bind_s_example")
def simple_bind_s_example():
"""
The bind's password argument is not set
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.simple_bind_s('cn=root')
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_s_example")
def bind_s_example():
"""
The bind's password argument is set to None
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind_s('cn=root', None)
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_s_example")
def bind_s_example_kwargs():
"""
The bind's password argument is set to None
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind_s(who='cn=root', cred=None)
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_example")
def bind_example():
"""
The bind's password argument is an empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_example")
def bind_example():
"""
The bind's password argument is an empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind(who='cn=root', cred="")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -0,0 +1,65 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
app = Flask(__name__)
@app.route("/passwordNone")
def passwordNone():
"""
The bind's password argument is set to None
"""
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, 'user_dn', None)
status, result, response, _ = conn.search(dn, search_filter)
@app.route("/passwordNone")
def passwordNoneKwargs():
"""
The bind's password argument is set to None
"""
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn', password=None)
status, result, response, _ = conn.search(dn, search_filter)
@app.route("/passwordEmpty")
def passwordEmpty():
"""
The bind's password argument is an empty string
"""
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn', password="")
status, result, response, _ = conn.search(dn, search_filter)
@app.route("/notPassword")
def notPassword():
"""
The bind's password argument is not set
"""
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn')
status, result, response, _ = conn.search(dn, search_filter)
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -0,0 +1,65 @@
from flask import request, Flask
import ldap
import ldap.filter
import ldap.dn
app = Flask(__name__)
@app.route("/simple_bind_example")
def simple_bind_example():
"""
The bind's password argument is a non-empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.simple_bind('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/simple_bind_s_example")
def simple_bind_s_example():
"""
The bind's password argument is a non-empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.simple_bind_s('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_s_example")
def bind_s_example():
"""
The bind's password argument is a non-empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind_s('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
@app.route("/bind_example")
def bind_example():
"""
The bind's password argument is a non-empty string
"""
dn = "dc={}".format(ldap.dn.escape_dn_chars(request.args['dc']))
search_filter = "(user={})".format(ldap.filter.escape_filter_chars(request.args['search']))
ldap_connection = ldap.initialize("ldap://127.0.0.1:1337")
ldap_connection.bind('cn=root', "SecurePa$$!")
user = ldap_connection.search_s(dn, ldap.SCOPE_SUBTREE, search_filter)
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -0,0 +1,24 @@
from ldap3 import Server, Connection, ALL
from flask import request, Flask
from ldap3.utils.dn import escape_rdn
from ldap3.utils.conv import escape_filter_chars
app = Flask(__name__)
@app.route("/passwordFromEnv")
def passwordFromEnv():
"""
The bind's password argument is a non-empty string
"""
dn = "dc={}".format(escape_rdn(request.args['dc']))
search_filter = "(user={})".format(escape_filter_chars(request.args['search']))
srv = Server('servername', get_info=ALL)
conn = Connection(srv, user='user_dn',
password="SecurePa$$!")
status, result, response, _ = conn.search(dn, search_filter)
# if __name__ == "__main__":
# app.run(debug=True)

View File

@@ -1,5 +0,0 @@
| ClickHouseDriver.py:15:22:15:106 | ControlFlowNode for Attribute() | ClickHouseDriver.py:15:52:15:105 | ControlFlowNode for BinaryExpr |
| ClickHouseDriver.py:18:5:18:87 | ControlFlowNode for Attribute() | ClickHouseDriver.py:18:33:18:86 | ControlFlowNode for BinaryExpr |
| ClickHouseDriver.py:22:5:22:62 | ControlFlowNode for Attribute() | ClickHouseDriver.py:22:33:22:37 | ControlFlowNode for query |
| ClickHouseDriver.py:27:5:27:74 | ControlFlowNode for Attribute() | ClickHouseDriver.py:27:20:27:73 | ControlFlowNode for BinaryExpr |
| ClickHouseDriver.py:30:5:30:89 | ControlFlowNode for Attribute() | ClickHouseDriver.py:30:35:30:88 | ControlFlowNode for BinaryExpr |

View File

@@ -1,32 +0,0 @@
from django.conf.urls import url
from clickhouse_driver import Client
from clickhouse_driver import connect
from aioch import Client as aiochClient
# Dummy Client subclass
class MyClient(Client):
def dummy(self):
return None
def show_user(request, username):
# BAD -- Untrusted user input is directly injected into the sql query using async library 'aioch'
aclient = aiochClient("localhost")
progress = await aclient.execute_with_progress("SELECT * FROM users WHERE username = '%s'" % username)
# BAD -- Untrusted user input is directly injected into the sql query using native client of library 'clickhouse_driver'
Client('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
# GOOD -- query uses prepared statements
query = "SELECT * FROM users WHERE username = %(username)s"
Client('localhost').execute(query, {"username": username})
# BAD -- Untrusted user input is directly injected into the sql query using PEP249 interface
conn = connect('clickhouse://localhost')
cursor = conn.cursor()
cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
# BAD -- Untrusted user input is directly injected into the sql query using MyClient, which is a subclass of Client
MyClient('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
urlpatterns = [url(r'^users/(?P<username>[^/]+)$', show_user)]

View File

@@ -1,6 +0,0 @@
import python
import experimental.semmle.python.frameworks.ClickHouseDriver
import semmle.python.Concepts
from SqlExecution s
select s, s.getSql()

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3

View File

@@ -0,0 +1,30 @@
import aioch
SQL = "SOME SQL"
async def aioch_test():
client = aioch.Client("localhost")
await client.execute(SQL) # $ getSql=SQL
await client.execute(query=SQL) # $ getSql=SQL
await client.execute_with_progress(SQL) # $ getSql=SQL
await client.execute_with_progress(query=SQL) # $ getSql=SQL
await client.execute_iter(SQL) # $ getSql=SQL
await client.execute_iter(query=SQL) # $ getSql=SQL
# Using custom client (this has been seen done for the blocking version in
# `clickhouse_driver` PyPI package)
class MyClient(aioch.Client):
pass
async def test_custom_client():
client = MyClient("localhost")
await client.execute(SQL) # $ getSql=SQL

View File

@@ -65,6 +65,20 @@ async def redirect_302(request): # $ requestHandler
else:
raise web.HTTPFound(location="/logout") # $ HttpResponse HttpRedirectResponse mimetype=application/octet-stream redirectLocation="/logout"
################################################################################
# Cookies
################################################################################
@routes.get("/setting_cookie") # $ routeSetup="/setting_cookie"
async def setting_cookie(request): # $ requestHandler
resp = web.Response(text="foo") # $ HttpResponse mimetype=text/plain responseBody="foo"
resp.cookies["key"] = "value" # $ CookieWrite CookieName="key" CookieValue="value"
resp.headers["Set-Cookie"] = "key2=value2" # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
resp.set_cookie("key3", "value3") # $ CookieWrite CookieName="key3" CookieValue="value3"
resp.set_cookie(name="key3", value="value3") # $ CookieWrite CookieName="key3" CookieValue="value3"
resp.del_cookie("key4") # $ CookieWrite CookieName="key4"
return resp
if __name__ == "__main__":
app = web.Application()

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,42 @@
import clickhouse_driver
SQL = "SOME SQL"
# Normal operation
client = clickhouse_driver.client.Client("localhost")
client.execute(SQL) # $ getSql=SQL
client.execute(query=SQL) # $ getSql=SQL
client.execute_with_progress(SQL) # $ getSql=SQL
client.execute_with_progress(query=SQL) # $ getSql=SQL
client.execute_iter(SQL) # $ getSql=SQL
client.execute_iter(query=SQL) # $ getSql=SQL
# commonly used alias
client = clickhouse_driver.Client("localhost")
client.execute(SQL) # $ getSql=SQL
# Using PEP249 interface
conn = clickhouse_driver.connect('clickhouse://localhost')
cursor = conn.cursor()
cursor.execute(SQL) # $ getSql=SQL
# Using custom client
#
# examples from real world code
# https://github.com/Altinity/clickhouse-mysql-data-reader/blob/3b1b7088751b05e5bbf45890c5949b58208c2343/clickhouse_mysql/dbclient/chclient.py#L10
# https://github.com/Felixoid/clickhouse-plantuml/blob/d8b2ba7d164a836770ec21f5e4035dfb04c41d9c/clickhouse_plantuml/client.py#L9
class MyClient(clickhouse_driver.Client):
pass
MyClient("localhost").execute(SQL) # $ getSql=SQL

View File

@@ -103,3 +103,17 @@ class CustomJsonResponse(JsonResponse):
def safe__custom_json_response(request):
return CustomJsonResponse("ACME Responses", {"foo": request.GET.get("foo")}) # $HttpResponse mimetype=application/json MISSING: responseBody=Dict SPURIOUS: responseBody="ACME Responses"
################################################################################
# Cookies
################################################################################
def setting_cookie(request):
resp = HttpResponse() # $ HttpResponse mimetype=text/html
resp.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
resp.set_cookie(key="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
resp.headers["Set-Cookie"] = "key2=value2" # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
resp.cookies["key3"] = "value3" # $ CookieWrite CookieName="key3" CookieValue="value3"
resp.delete_cookie("key4") # $ CookieWrite CookieName="key4"
resp.delete_cookie(key="key4") # $ CookieWrite CookieName="key4"
return resp

View File

@@ -184,6 +184,20 @@ def redirect_simple(): # $requestHandler
return resp # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=resp
################################################################################
# Cookies
################################################################################
@app.route("/setting_cookie") # $routeSetup="/setting_cookie"
def setting_cookie(): # $requestHandler
resp = make_response() # $ HttpResponse mimetype=text/html
resp.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
resp.set_cookie(key="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
resp.headers.add("Set-Cookie", "key2=value2") # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
resp.delete_cookie("key3") # $ CookieWrite CookieName="key3"
resp.delete_cookie(key="key3") # $ CookieWrite CookieName="key3"
return resp # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=resp
################################################################################

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,33 @@
import jmespath
def test_taint():
untrusted_data = TAINTED_DICT
safe_expression = jmespath.compile("foo.bar")
ensure_tainted(
jmespath.search("foo.bar", untrusted_data), # $ tainted
jmespath.search("foo.bar", data=untrusted_data), # $ tainted
safe_expression.search(untrusted_data), # $ tainted
safe_expression.search(value=untrusted_data) # $ tainted
)
# since ```jmespath.search("{wat: `foo`}", {})``` works (and outputs a dictionary),
# we _could_ add a taint-step from the search expression to the output. However, it
# seems more likely to lead to FPs than good results, so these have deliberately not
# been included.
ts = TAINTED_STRING
safe_data = {"foo": "bar"}
unsafe_expression = jmespath.compile(ts)
ensure_not_tainted(
jmespath.search(ts, safe_data),
jmespath.search(expression=ts, data=safe_data),
unsafe_expression,
unsafe_expression.search(safe_data),
unsafe_expression.search(value=safe_data),
)

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1,13 @@
import experimental.meta.InlineTaintTest
import semmle.python.Concepts
class HtmlSpecialization extends TestTaintTrackingConfiguration {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
}

View File

@@ -0,0 +1,81 @@
from markupsafe import escape, escape_silent, Markup
def ensure_tainted(*args):
print("ensure_tainted")
for x in args: print(" ", x)
def ensure_not_tainted(*args):
print("ensure_not_tainted")
for x in args: print(" ", x)
# these contain `{}` so we can use .format, and `%s` so we can use %-style formatting
TAINTED_STRING = '<"TAINTED_STRING" {} %s>'
SAFE = "SAFE {} %s"
def test():
ts = TAINTED_STRING
# class `Markup` can be used for things that are already safe.
# if used with any text in a string operation, that other text will be escaped.
#
# see https://markupsafe.palletsprojects.com/en/2.0.x/
m_unsafe = Markup(TAINTED_STRING)
m_safe = Markup(SAFE)
# this 3 tests might look strange, but the purpose is to check we still treat `ts`
# as tainted even after it has been escaped in some place. This _might_ not be the
# case since data-flow library has taint-steps from adjacent uses...
ensure_tainted(ts) # $ tainted
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
ensure_tainted(ts) # $ tainted
ensure_tainted(
ts, # $ tainted
m_unsafe, # $ tainted
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_safe.format(m_unsafe), # $ tainted
m_safe % m_unsafe, # $ tainted
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
)
ensure_not_tainted(
escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
m_safe,
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
)
# flask re-exports these, as:
# flask.escape = markupsafe.escape
# flask.Markup = markupsafe.Markup
import flask
ensure_tainted(
flask.Markup(ts), # $ tainted
)
ensure_not_tainted(
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
)
test()

View File

@@ -6,7 +6,7 @@ private import semmle.python.dataflow.new.TaintTracking
/** A data-flow Node representing an instance of MyClass. */
abstract class MyClass extends DataFlow::Node { }
private DataFlow::LocalSourceNode myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
private DataFlow::TypeTrackingNode myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
t.startInAttr("get_value") and
result = qualifier
or

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,68 @@
# Following examples from https://stuvel.eu/python-rsa-doc/usage.html
import rsa
# using a rather low keysize, since otherwise it takes quite long to run.
(public_key, private_key) = rsa.newkeys(512) # $ PublicKeyGeneration keySize=512
(public_key, private_key) = rsa.newkeys(nbits=512) # $ PublicKeyGeneration keySize=512
# ------------------------------------------------------------------------------
# encrypt/decrypt
# ------------------------------------------------------------------------------
# Note: These are using PKCS#1 v1.5
print("encrypt/decrypt")
secret_message = b"secret message"
encrypted = rsa.encrypt(secret_message, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=secret_message
encrypted = rsa.encrypt(message=secret_message, pub_key=public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=secret_message
print("encrypted={}".format(encrypted))
print()
decrypted = rsa.decrypt(encrypted, private_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=encrypted
decrypted = rsa.decrypt(crypto=encrypted, priv_key=private_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=encrypted
print("decrypted={}".format(decrypted))
assert decrypted == secret_message
print("\n---\n")
# ------------------------------------------------------------------------------
# sign/verify
# ------------------------------------------------------------------------------
# Note: These are using PKCS#1 v1.5
print("sign/verify")
message = b"message"
other_message = b"other message"
hash = rsa.compute_hash(message, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
hash = rsa.compute_hash(message=message, method_name="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
signature_from_hash = rsa.sign_hash(hash, private_key, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=hash
signature_from_hash = rsa.sign_hash(hash_value=hash, priv_key=private_key, hash_method="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=hash
signature = rsa.sign(message, private_key, "SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
signature = rsa.sign(message=message, priv_key=private_key, hash_method="SHA-256") # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationAlgorithm=SHA256 CryptographicOperationInput=message
assert signature == signature_from_hash
print("signature={}".format(signature))
print()
rsa.verify(message, signature, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=message CryptographicOperationInput=signature
rsa.verify(message=message, signature=signature, pub_key=public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=message CryptographicOperationInput=signature
print("Signature verified (as expected)")
try:
rsa.verify(other_message, signature, public_key) # $ CryptographicOperation CryptographicOperationAlgorithm=RSA CryptographicOperationInput=other_message CryptographicOperationInput=signature
raise Exception("Signature verified (unexpected)")
except rsa.VerificationError:
print("Signature mismatch (as expected)")

View File

@@ -0,0 +1,23 @@
from pathlib import Path, PosixPath, WindowsPath
p = Path("filepath")
posix = PosixPath("posix/filepath")
windows = WindowsPath("windows/filepath")
p.chmod(0o777) # $ getAPathArgument=p
posix.chmod(0o777) # $ getAPathArgument=posix
windows.chmod(0o777) # $ getAPathArgument=windows
with p.open() as f: # $ getAPathArgument=p
f.read()
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
name = windows.parent.name
o = open
o(name) # $ getAPathArgument=name
wb = p.write_bytes
wb(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"

View File

@@ -1,39 +1,29 @@
import builtins
import io
open("filepath") # $getAPathArgument="filepath"
open(file="filepath") # $getAPathArgument="filepath"
open("filepath") # $ getAPathArgument="filepath"
open(file="filepath") # $ getAPathArgument="filepath"
o = open
o("filepath") # $getAPathArgument="filepath"
o(file="filepath") # $getAPathArgument="filepath"
o("filepath") # $ getAPathArgument="filepath"
o(file="filepath") # $ getAPathArgument="filepath"
builtins.open("filepath") # $getAPathArgument="filepath"
builtins.open(file="filepath") # $getAPathArgument="filepath"
builtins.open("filepath") # $ getAPathArgument="filepath"
builtins.open(file="filepath") # $ getAPathArgument="filepath"
io.open("filepath") # $getAPathArgument="filepath"
io.open(file="filepath") # $getAPathArgument="filepath"
io.open("filepath") # $ getAPathArgument="filepath"
io.open(file="filepath") # $ getAPathArgument="filepath"
from pathlib import Path, PosixPath, WindowsPath
f = open("path") # $ getAPathArgument="path"
f.write("foo") # $ getAPathArgument="path" fileWriteData="foo"
lines = ["foo"]
f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines
p = Path("filepath")
posix = PosixPath("posix/filepath")
windows = WindowsPath("windows/filepath")
p.chmod(0o777) # $getAPathArgument=p
posix.chmod(0o777) # $getAPathArgument=posix
windows.chmod(0o777) # $getAPathArgument=windows
def through_function(open_file):
open_file.write("foo") # $ fileWriteData="foo" getAPathArgument="path"
with p.open() as f: # $getAPathArgument=p
f.read()
p.write_bytes(b"hello") # $getAPathArgument=p
name = windows.parent.name
o(name) # $getAPathArgument=name
wb = p.write_bytes
wb(b"hello") # $getAPathArgument=p
through_function(f)

View File

@@ -0,0 +1,45 @@
import logging
# this bit just included to make this file runable
logging.basicConfig(level=logging.DEBUG)
password = "<pass>"
msg = "foo %s"
LOGGER = logging.getLogger("LOGGER")
logging.info(msg, password) # $ loggingInput=msg loggingInput=password
logging.info(msg="hello") # $ loggingInput="hello"
logging.log(logging.INFO, msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.log(logging.INFO, msg, password) # $ loggingInput=msg loggingInput=password
logging.root.info(msg, password) # $ loggingInput=msg loggingInput=password
# test of all levels
logging.critical(msg, password) # $ loggingInput=msg loggingInput=password
logging.fatal(msg, password) # $ loggingInput=msg loggingInput=password
logging.error(msg, password) # $ loggingInput=msg loggingInput=password
logging.warning(msg, password) # $ loggingInput=msg loggingInput=password
logging.warn(msg, password) # $ loggingInput=msg loggingInput=password
logging.info(msg, password) # $ loggingInput=msg loggingInput=password
logging.debug(msg, password) # $ loggingInput=msg loggingInput=password
logging.exception(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.critical(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.fatal(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.error(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.warning(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.warn(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.info(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.debug(msg, password) # $ loggingInput=msg loggingInput=password
LOGGER.exception(msg, password) # $ loggingInput=msg loggingInput=password
# not sure how to make these print anything, but just to show that it works
logging.Logger("foo").info("hello") # $ loggingInput="hello"
class MyLogger(logging.Logger):
pass
MyLogger("bar").info("hello") # $ loggingInput="hello"

View File

@@ -58,6 +58,18 @@ class ExampleConnectionWrite(tornado.web.RequestHandler):
stream.write(b"foo stream") # $ MISSING: HttpResponse responseBody=b"foo stream"
stream.close()
################################################################################
# Cookies
################################################################################
class CookieWriting(tornado.web.RequestHandler):
def get(self): # $ requestHandler
self.write("foo") # $ HttpResponse mimetype=text/html responseBody="foo"
self.set_cookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
self.set_cookie(name="key", value="value") # $ CookieWrite CookieName="key" CookieValue="value"
self.set_header("Set-Cookie", "key2=value2") # $ MISSING: CookieWrite CookieRawHeader="key2=value2"
def make_app():
return tornado.web.Application(
[
@@ -66,6 +78,7 @@ def make_app():
(r"/ExampleRedirect", ExampleRedirect), # $ routeSetup="/ExampleRedirect"
(r"/ExampleConnectionWrite", ExampleConnectionWrite), # $ routeSetup="/ExampleConnectionWrite"
(r"/ExampleConnectionWrite/(stream)", ExampleConnectionWrite), # $ routeSetup="/ExampleConnectionWrite/(stream)"
(r"/CookieWriting", CookieWriting), # $ routeSetup="/CookieWriting"
],
debug=True,
)
@@ -74,6 +87,7 @@ def make_app():
if __name__ == "__main__":
import tornado.ioloop
print("running on http://localhost:8888/")
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()

View File

@@ -0,0 +1,12 @@
import python
import experimental.meta.ConceptsTest
class DedicatedResponseTest extends HttpServerHttpResponseTest {
DedicatedResponseTest() { file.getShortName() = "response_test.py" }
}
class OtherResponseTest extends HttpServerHttpResponseTest {
OtherResponseTest() { not this instanceof DedicatedResponseTest }
override string getARelevantTag() { result = "HttpResponse" }
}

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,80 @@
from twisted.web.server import Site, Request, NOT_DONE_YET
from twisted.web.resource import Resource
from twisted.internet import reactor, endpoints, defer
root = Resource()
class Now(Resource):
def render(self, request: Request): # $ requestHandler
return b"now" # $ HttpResponse mimetype=text/html responseBody=b"now"
class AlsoNow(Resource):
def render(self, request: Request): # $ requestHandler
request.write(b"also now") # $ HttpResponse mimetype=text/html responseBody=b"also now"
return b"" # $ HttpResponse mimetype=text/html responseBody=b""
def process_later(request: Request):
print("process_later called")
request.write(b"later") # $ MISSING: responseBody=b"later"
request.finish()
class Later(Resource):
def render(self, request: Request): # $ requestHandler
# process the request in 1 second
print("setting up callback for process_later")
reactor.callLater(1, process_later, request)
return NOT_DONE_YET # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=NOT_DONE_YET
class PlainText(Resource):
def render(self, request: Request): # $ requestHandler
request.setHeader(b"content-type", "text/plain")
return b"this is plain text" # $ HttpResponse responseBody=b"this is plain text" SPURIOUS: mimetype=text/html MISSING: mimetype=text/plain
class Redirect(Resource):
def render_GET(self, request: Request): # $ requestHandler
request.redirect("/new-location") # $ HttpRedirectResponse redirectLocation="/new-location" HttpResponse mimetype=text/html
# By default, this `hello` output is not returned... not even when
# requested with curl.
return b"hello" # $ SPURIOUS: HttpResponse mimetype=text/html responseBody=b"hello"
################################################################################
# Cookies
################################################################################
class CookieWriting(Resource):
"""Examples of providing values in response that is not in the body
"""
def render_GET(self, request: Request): # $ requestHandler
request.addCookie("key", "value") # $ CookieWrite CookieName="key" CookieValue="value"
request.addCookie(k="key", v="value") # $ CookieWrite CookieName="key" CookieValue="value"
val = "key2=value"
request.cookies.append(val) # $ CookieWrite CookieRawHeader=val
request.responseHeaders.addRawHeader("key", "value")
request.setHeader("Set-Cookie", "key3=value3") # $ MISSING: CookieWrite CookieRawHeader="key3=value3"
return b"" # $ HttpResponse mimetype=text/html responseBody=b""
root.putChild(b"now", Now())
root.putChild(b"also-now", AlsoNow())
root.putChild(b"later", Later())
root.putChild(b"plain-text", PlainText())
root.putChild(b"redirect", Redirect())
root.putChild(b"setting_cookie", CookieWriting())
if __name__ == "__main__":
factory = Site(root)
endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
endpoint.listen(factory)
print("Will run on http://localhost:8880")
reactor.run()

View File

@@ -0,0 +1,47 @@
from twisted.web.server import Site, Request
from twisted.web.resource import Resource
from twisted.internet import reactor, endpoints
root = Resource()
class Foo(Resource):
def render(self, request: Request): # $ requestHandler
print(f"{request.content=}")
print(f"{request.cookies=}")
print(f"{request.received_cookies=}")
return b"I am Foo" # $ HttpResponse
root.putChild(b"foo", Foo())
class Child(Resource):
def __init__(self, name):
self.name = name.decode("utf-8")
def render_GET(self, request): # $ requestHandler
return f"Hi, I'm child '{self.name}'".encode("utf-8") # $ HttpResponse
class Parent(Resource):
def getChild(self, path, request): # $ requestHandler
print(path, type(path))
return Child(path)
def render_GET(self, request): # $ requestHandler
return b"Hi, I'm parent" # $ HttpResponse
root.putChild(b"parent", Parent())
if __name__ == "__main__":
factory = Site(root)
endpoint = endpoints.TCP4ServerEndpoint(reactor, 8880)
endpoint.listen(factory)
print("Will run on http://localhost:8880")
reactor.run()

View File

@@ -0,0 +1,70 @@
from twisted.web.resource import Resource
from twisted.web.server import Request
class MyTaintTest(Resource):
def getChild(self, path, request): # $ requestHandler
ensure_tainted(path, request) # $ tainted
def render(self, request): # $ requestHandler
ensure_tainted(request) # $ tainted
def render_GET(self, request: Request): # $ requestHandler
# see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html
ensure_tainted(
request, # $ tainted
request.uri, # $ tainted
request.path, # $ tainted
request.prepath, # $ tainted
request.postpath, # $ tainted
# file-like
request.content, # $ tainted
request.content.read(), # $ MISSING: tainted
# Dict[bytes, List[bytes]] (for query args)
request.args, # $ tainted
request.args[b"key"], # $ tainted
request.args[b"key"][0], # $ tainted
request.args.get(b"key"), # $ tainted
request.args.get(b"key")[0], # $ tainted
request.received_cookies, # $ tainted
request.received_cookies["key"], # $ tainted
request.received_cookies.get("key"), # $ tainted
request.getCookie(b"key"), # $ tainted
# twisted.web.http_headers.Headers
# see https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http_headers.Headers.html
request.requestHeaders, # $ tainted
request.requestHeaders.getRawHeaders("key"), # $ MISSING: tainted
request.requestHeaders.getRawHeaders("key")[0], # $ MISSING: tainted
request.requestHeaders.getAllRawHeaders(), # $ MISSING: tainted
list(request.requestHeaders.getAllRawHeaders()), # $ MISSING: tainted
request.getHeader("key"), # $ tainted
request.getAllHeaders(), # $ tainted
request.getAllHeaders()["key"], # $ tainted
request.user, # $ tainted
request.getUser(), # $ tainted
request.password, # $ tainted
request.getPassword(), # $ tainted
request.host, # $ tainted
request.getHost(), # $ tainted
request.getRequestHostname(), # $ tainted
)
# technically user-controlled, but unlikely to lead to vulnerabilities.
ensure_not_tainted(
request.method,
)
# not tainted at all
ensure_not_tainted(
# outgoing things
request.cookies,
request.responseHeaders,
)

View File

@@ -2,6 +2,8 @@ import python
import semmle.python.regex
from Regex r, int start, int end, int part_start, int part_end
where r.alternationOption(start, end, part_start, part_end)
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.alternationOption(start, end, part_start, part_end)
select r.getText(), start, end, r.getText().substring(start, end), part_start, part_end,
r.getText().substring(part_start, part_end)

View File

@@ -2,6 +2,8 @@ import python
import semmle.python.regex
from Regex r, int start, int end, int part_start, int part_end
where r.groupContents(start, end, part_start, part_end)
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.groupContents(start, end, part_start, part_end)
select r.getText(), start, end, r.getText().substring(start, end), part_start, part_end,
r.getText().substring(part_start, part_end)

View File

@@ -2,4 +2,5 @@ import python
import semmle.python.regex
from Regex r
where r.getLocation().getFile().getBaseName() = "test.py"
select r.getLocation().getStartLine(), r.getAMode()

View File

@@ -1,15 +1,15 @@
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false |
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true |
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true |
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true |
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true |
| (?P<name>[\\w]+)\| | 9 | 14 | false |
| \\A[+-]?\\d+ | 2 | 7 | true |
| \\A[+-]?\\d+ | 7 | 10 | false |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true |
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
| ax{01,3} | 1 | 8 | false |
| ax{3,} | 1 | 6 | false |
| ax{3} | 1 | 5 | false |
| ax{,3} | 1 | 6 | true |
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false | true |
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true | false |
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true | true |
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true | false |
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true | true |
| (?P<name>[\\w]+)\| | 9 | 14 | false | true |
| \\A[+-]?\\d+ | 2 | 7 | true | false |
| \\A[+-]?\\d+ | 7 | 10 | false | true |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true | true |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true | true |
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false | true |
| ax{01,3} | 1 | 8 | false | false |
| ax{3,} | 1 | 6 | false | true |
| ax{3} | 1 | 5 | false | false |
| ax{,3} | 1 | 6 | true | false |

View File

@@ -1,6 +1,8 @@
import python
import semmle.python.regex
from Regex r, int start, int end, boolean maybe_empty
where r.qualifiedItem(start, end, maybe_empty)
select r.getText(), start, end, maybe_empty
from Regex r, int start, int end, boolean maybe_empty, boolean may_repeat_forever
where
r.getLocation().getFile().getBaseName() = "test.py" and
r.qualifiedItem(start, end, maybe_empty, may_repeat_forever)
select r.getText(), start, end, maybe_empty, may_repeat_forever

View File

@@ -16,7 +16,7 @@ predicate part(Regex r, int start, int end, string kind) {
or
r.group(start, end) and not r.zeroWidthMatch(start, end) and kind = "non-empty group"
or
r.qualifiedItem(start, end, _) and kind = "qualified"
r.qualifiedItem(start, end, _, _) and kind = "qualified"
}
from Regex r, int start, int end, string kind

View File

@@ -0,0 +1,75 @@
import python
import TestUtilities.InlineExpectationsTest
private import semmle.python.regex
class CharacterSetTest extends InlineExpectationsTest {
CharacterSetTest() { this = "CharacterSetTest" }
override string getARelevantTag() { result = "charSet" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "charSetTest.py" and
exists(Regex re, int start, int end |
re.charSet(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
value = start + ":" + end and
tag = "charSet"
)
}
}
class CharacterRangeTest extends InlineExpectationsTest {
CharacterRangeTest() { this = "CharacterRangeTest" }
override string getARelevantTag() { result = "charRange" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "charRangeTest.py" and
exists(Regex re, int start, int lower_end, int upper_start, int end |
re.charRange(_, start, lower_end, upper_start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
value = start + ":" + lower_end + "-" + upper_start + ":" + end and
tag = "charRange"
)
}
}
class EscapeTest extends InlineExpectationsTest {
EscapeTest() { this = "EscapeTest" }
override string getARelevantTag() { result = "escapedCharacter" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "escapedCharacterTest.py" and
exists(Regex re, int start, int end |
re.escapedCharacter(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
value = start + ":" + end and
tag = "escapedCharacter"
)
}
}
class GroupTest extends InlineExpectationsTest {
GroupTest() { this = "GroupTest" }
override string getARelevantTag() { result = "group" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
location.getFile().getBaseName() = "groupTest.py" and
exists(Regex re, int start, int end |
re.group(start, end) and
location = re.getLocation() and
element = re.getText().substring(start, end) and
value = start + ":" + end and
tag = "group"
)
}
}

View File

@@ -0,0 +1,45 @@
import re
re.compile(r'[A-Z]') #$ charRange=1:2-3:4
try:
re.compile(r'[]-[]') #$ SPURIOUS: charRange=1:2-3:4
raise Exception("this should not be reached")
except re.error:
pass
re.compile(r'[---]') #$ charRange=1:2-3:4
re.compile(r'[\---]') #$ charRange=1:3-4:5
re.compile(r'[--\-]') #$ charRange=1:2-3:5
re.compile(r'[\--\-]') #$ charRange=1:3-4:6
re.compile(r'[0-9-A-Z]') #$ charRange=1:2-3:4 charRange=5:6-7:8
re.compile(r'[0\-9-A-Z]') #$ charRange=4:5-6:7
try:
re.compile(r'[0--9-A-Z]') #$ SPURIOUS: charRange=1:2-3:4 charRange=4:5-6:7
raise Exception("this should not be reached")
except re.error:
pass
re.compile(r'[^A-Z]') #$ charRange=2:3-4:5
re.compile(r'[\0-\09]') #$ charRange=1:3-4:7
re.compile(r'[\0123-5]') #$ charRange=5:6-7:8
#Negative lookahead
re.compile(r'(?!not-this)^[A-Z_]+$') #$ charRange=14:15-16:17
#Negative lookbehind
re.compile(r'^[A-Z_]+$(?<!not-this)') #$ charRange=2:3-4:5
#OK -- ODASA-ODASA-3968
re.compile('(?:[^%]|^)?%\((\w*)\)[a-z]') #$ charRange=22:23-24:25
#ODASA-3985
#Half Surrogate pairs
re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') #$ charRange=1:2-3:4 charRange=6:7-8:9
#Outside BMP
re.compile(u'[\U00010000-\U0010ffff]') #$ charRange=1:2-3:4

View File

@@ -0,0 +1,39 @@
import re
re.compile(r'\A[+-]?\d+') #$ charSet=2:6
re.compile(r'(?P<name>[\w]+)|') #$ charSet=9:13
re.compile(r'\|\[\][123]|\{\}') #$ charSet=6:11
re.compile(r'[^A-Z]') #$ charSet=0:6
re.compile("[]]") #$ charSet=0:3
re.compile("[][]") #$ charSet=0:4
re.compile("[^][^]") #$ charSet=0:6
re.compile("[.][.]") #$ charSet=0:3 charSet=3:6
re.compile("[[]]") #$ charSet=0:3
re.compile("[^]]") #$ charSet=0:4
re.compile("[^-]") #$ charSet=0:4
try:
re.compile("[]-[]") #$ SPURIOUS: charSet=0:5
raise Exception("this should not be reached")
except re.error:
pass
try:
re.compile("[^]-[]") #$ SPURIOUS: charSet=0:6
raise Exception("this should not be reached")
except re.error:
pass
re.compile("]]][[[[]") #$ charSet=3:8
#ODASA-3985
#Half Surrogate pairs
re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') #$ charSet=0:5 charSet=5:10
#Outside BMP
re.compile(u'[\U00010000-\U0010ffff]') #$ charSet=0:5
#Misparsed on LGTM
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)") #$ charSet=10:14 charSet=28:32
# parses wrongly, sees this \|/ as a char set start
re.compile(r'''(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+''') #$ charSet=3:25 charSet=30:35

View File

@@ -0,0 +1,23 @@
import re
re.compile(r'\b') #$ escapedCharacter=0:2
re.compile(r'''\b''') #$ escapedCharacter=0:2
re.compile(r"\b") #$ escapedCharacter=0:2
re.compile(u"\b") # not escape
re.compile("\b") # not escape
re.compile(r'\\\b') #$ escapedCharacter=0:2 escapedCharacter=2:4
re.compile(r'[\---]') #$ escapedCharacter=1:3
re.compile(r'[--\-]') #$ escapedCharacter=3:5
re.compile(r'[\--\-]') #$ escapedCharacter=1:3 escapedCharacter=4:6
re.compile(r'[0\-9-A-Z]') #$ escapedCharacter=2:4
re.compile(r'[\0-\09]') #$ escapedCharacter=1:3 escapedCharacter=4:7
re.compile(r'[\0123-5]') #$ escapedCharacter=1:5
#ODASA-3985
#Half Surrogate pairs
re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') # not escapes
#Outside BMP
re.compile(u'[\U00010000-\U0010ffff]') # not escapes
#Misparsed on LGTM
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)") #$ escapedCharacter=0:2 escapedCharacter=16:18 escapedCharacter=18:20

View File

@@ -0,0 +1,4 @@
import re
re.compile(r'(?P<first>\w+) (?P<second>\w+)') #$ group=0:14 group=15:30
re.compile(r'([)(])') #$ group=0:6

View File

@@ -0,0 +1,27 @@
edges
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password |
| test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password |
nodes
| test.py:19:16:19:29 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:20:48:20:55 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:22:58:22:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:23:58:23:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:27:40:27:47 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:30:58:30:65 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:34:30:34:39 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:37:11:37:24 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:39:22:39:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test.py:40:22:40:35 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
#select
| test.py:20:48:20:55 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:20:48:20:55 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:22:58:22:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:22:58:22:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:23:58:23:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:23:58:23:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:27:40:27:47 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:27:40:27:47 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:30:58:30:65 | ControlFlowNode for password | test.py:19:16:19:29 | ControlFlowNode for get_password() | test.py:30:58:30:65 | ControlFlowNode for password | $@ is logged here. | test.py:19:16:19:29 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:34:30:34:39 | ControlFlowNode for get_cert() | test.py:34:30:34:39 | ControlFlowNode for get_cert() | test.py:34:30:34:39 | ControlFlowNode for get_cert() | $@ is logged here. | test.py:34:30:34:39 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
| test.py:37:11:37:24 | ControlFlowNode for get_password() | test.py:37:11:37:24 | ControlFlowNode for get_password() | test.py:37:11:37:24 | ControlFlowNode for get_password() | $@ is logged here. | test.py:37:11:37:24 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | test.py:39:22:39:35 | ControlFlowNode for get_password() | $@ is logged here. | test.py:39:22:39:35 | ControlFlowNode for get_password() | Sensitive data (password) |
| test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | test.py:40:22:40:35 | ControlFlowNode for get_password() | $@ is logged here. | test.py:40:22:40:35 | ControlFlowNode for get_password() | Sensitive data (password) |

View File

@@ -0,0 +1,46 @@
import logging
import sys
LOGGER = logging.getLogger("LOGGER")
def get_logger():
return LOGGER
def get_password():
return "<PASSWORD>"
def get_cert():
return "<CERT>"
def log_password():
password = get_password()
logging.info("logging.info Password '%s'", password) # NOT OK
LOGGER.log(logging.INFO, "LOGGER.log Password '%s'", password) # NOT OK
logging.root.info("logging.root.info Password '%s'", password) # NOT OK
# name of logger variable should not matter
foo = LOGGER
foo.info("foo.info Password '%s'", password) # NOT OK
# return value from function
get_logger().info("get_logger().info Password '%s'", password) # NOT OK
def log_cert():
logging.debug("Cert=%s", get_cert()) # NOT OK
def print_password():
print(get_password()) # NOT OK
sys.stdout.write(get_password()) # NOT OK
sys.stderr.write(get_password()) # NOT OK
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
log_password()
log_cert()
print_password()

View File

@@ -0,0 +1,13 @@
edges
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
nodes
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
#select
| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | $@ is stored here. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |

View File

@@ -0,0 +1,15 @@
import pathlib
def get_cert():
return "<CERT>"
def write_password(filename):
cert = get_cert()
path = pathlib.Path(filename)
path.write_text(cert) # NOT OK
path.write_bytes(cert.encode("utf-8")) # NOT OK
path.open("w").write(cert) # NOT OK

View File

@@ -0,0 +1,20 @@
edges
| password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | password_in_cookie.py:9:33:9:40 | ControlFlowNode for password |
| password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | password_in_cookie.py:16:33:16:40 | ControlFlowNode for password |
| test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:8:20:8:23 | ControlFlowNode for cert |
| test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:9:17:9:29 | ControlFlowNode for List |
| test.py:9:17:9:29 | ControlFlowNode for List | test.py:10:25:10:29 | ControlFlowNode for lines |
nodes
| password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | semmle.label | ControlFlowNode for password |
| test.py:6:12:6:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:8:20:8:23 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:9:17:9:29 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
| test.py:10:25:10:29 | ControlFlowNode for lines | semmle.label | ControlFlowNode for lines |
#select
| password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | password_in_cookie.py:9:33:9:40 | ControlFlowNode for password | $@ is stored here. | password_in_cookie.py:7:16:7:43 | ControlFlowNode for Attribute() | Sensitive data (password) |
| password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | password_in_cookie.py:16:33:16:40 | ControlFlowNode for password | $@ is stored here. | password_in_cookie.py:14:16:14:43 | ControlFlowNode for Attribute() | Sensitive data (password) |
| test.py:8:20:8:23 | ControlFlowNode for cert | test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:8:20:8:23 | ControlFlowNode for cert | $@ is stored here. | test.py:6:12:6:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |
| test.py:10:25:10:29 | ControlFlowNode for lines | test.py:6:12:6:21 | ControlFlowNode for get_cert() | test.py:10:25:10:29 | ControlFlowNode for lines | $@ is stored here. | test.py:6:12:6:21 | ControlFlowNode for get_cert() | Sensitive data (certificate) |

View File

@@ -0,0 +1 @@
Security/CWE-312/CleartextStorage.ql

View File

@@ -6,12 +6,12 @@ app = Flask("Leak password")
def index():
password = request.args.get("password")
resp = make_response(render_template(...))
resp.set_cookie("password", password)
resp.set_cookie("password", password) # NOT OK
return resp
@app.route('/')
def index2():
password = request.args.get("password")
resp = Response(...)
resp.set_cookie("password", password)
resp.set_cookie("password", password) # NOT OK
return resp

View File

@@ -0,0 +1,10 @@
def get_cert():
return "<CERT>"
def write_cert(filename):
cert = get_cert()
with open(filename, "w") as file:
file.write(cert) # NOT OK
lines = [cert + "\n"]
file.writelines(lines) # NOT OK

View File

@@ -1,10 +0,0 @@
edges
| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
#select
| test.py:8:35:8:42 | password | test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password | Sensitive data returned by $@ is logged here. | test.py:7:16:7:29 | get_password() | a call returning a password |
| test.py:14:30:14:39 | get_cert() | test.py:14:30:14:39 | a certificate or key | test.py:14:30:14:39 | a certificate or key | Sensitive data returned by $@ is logged here. | test.py:14:30:14:39 | get_cert() | a call returning a certificate or key |
| test.py:17:11:17:24 | get_password() | test.py:17:11:17:24 | a password | test.py:17:11:17:24 | a password | Sensitive data returned by $@ is logged here. | test.py:17:11:17:24 | get_password() | a call returning a password |

View File

@@ -1,12 +0,0 @@
edges
| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
| password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password |
| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
| password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password |
| test.py:7:16:7:29 | a password | test.py:8:35:8:42 | a password |
| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
| test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key |
#select
| password_in_cookie.py:9:33:9:40 | password | password_in_cookie.py:7:16:7:43 | a password | password_in_cookie.py:9:33:9:40 | a password | Sensitive data from $@ is stored here. | password_in_cookie.py:7:16:7:43 | Attribute() | a request parameter containing a password |
| password_in_cookie.py:16:33:16:40 | password | password_in_cookie.py:14:16:14:43 | a password | password_in_cookie.py:16:33:16:40 | a password | Sensitive data from $@ is stored here. | password_in_cookie.py:14:16:14:43 | Attribute() | a request parameter containing a password |
| test.py:22:20:22:23 | cert | test.py:20:12:20:21 | a certificate or key | test.py:22:20:22:23 | a certificate or key | Sensitive data from $@ is stored here. | test.py:20:12:20:21 | get_cert() | a call returning a certificate or key |

View File

@@ -1 +0,0 @@
semmle-extractor-options: -p ../lib/ --max-import-depth=3

View File

@@ -1,22 +0,0 @@
#Don't import logging; it transitively imports a lot of stuff
def get_password():
pass
def log_password():
password = get_password()
logging.info("Password '%s'", password)
def get_cert():
pass
def log_cert():
logging.debug("Cert=%s", get_cert())
def print_password():
print(get_password())
def write_cert(filename):
cert = get_cert()
with open(filename, "w") as file:
file.write(cert)

View File

@@ -1,27 +1,71 @@
edges
| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome | test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate |
| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password |
| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password |
| test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:2:23:2:34 | GSSA Variable get_password |
| test_cryptodome.py:2:23:2:34 | GSSA Variable get_password | test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome |
| test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate |
| test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate | test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome |
| test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous |
| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography | test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate |
| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password |
| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password |
| test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:3:23:3:34 | GSSA Variable get_password |
| test_cryptography.py:3:23:3:34 | GSSA Variable get_password | test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography |
| test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate |
| test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate | test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography |
| test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous |
| test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous |
| test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous |
| test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous |
| test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous |
| test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous |
nodes
| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome | semmle.label | ModuleVariableNode for Global Variable get_certificate in Module test_cryptodome |
| test_cryptodome.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptodome | semmle.label | ModuleVariableNode for Global Variable get_password in Module test_cryptodome |
| test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test_cryptodome.py:2:23:2:34 | GSSA Variable get_password | semmle.label | GSSA Variable get_password |
| test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test_cryptodome.py:2:37:2:51 | GSSA Variable get_certificate | semmle.label | GSSA Variable get_certificate |
| test_cryptodome.py:6:17:6:31 | ControlFlowNode for get_certificate | semmle.label | ControlFlowNode for get_certificate |
| test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptodome.py:13:17:13:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
| test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptodome.py:20:17:20:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
| test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography | semmle.label | ModuleVariableNode for Global Variable get_certificate in Module test_cryptography |
| test_cryptography.py:0:0:0:0 | ModuleVariableNode for Global Variable get_password in Module test_cryptography | semmle.label | ModuleVariableNode for Global Variable get_password in Module test_cryptography |
| test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test_cryptography.py:3:23:3:34 | GSSA Variable get_password | semmle.label | GSSA Variable get_password |
| test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test_cryptography.py:3:37:3:51 | GSSA Variable get_certificate | semmle.label | GSSA Variable get_certificate |
| test_cryptography.py:7:17:7:31 | ControlFlowNode for get_certificate | semmle.label | ControlFlowNode for get_certificate |
| test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:15:17:15:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
| test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:23:17:23:28 | ControlFlowNode for get_password | semmle.label | ControlFlowNode for get_password |
| test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
#select
| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptodome.py:2:37:2:51 | ControlFlowNode for ImportMember | Sensitive data (certificate) |
| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:2:23:2:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptography.py:3:37:3:51 | ControlFlowNode for ImportMember | Sensitive data (certificate) |
| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:3:23:3:34 | ControlFlowNode for ImportMember | Sensitive data (password) |
| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | Sensitive data (password) |

View File

@@ -0,0 +1,12 @@
edges
| test.py:7:12:7:18 | ControlFlowNode for request | test.py:7:12:7:23 | ControlFlowNode for Attribute |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:8:30:8:33 | ControlFlowNode for text |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:9:32:9:35 | ControlFlowNode for text |
nodes
| test.py:7:12:7:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:7:12:7:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:8:30:8:33 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
| test.py:9:32:9:35 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
#select
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:7:12:7:18 | ControlFlowNode for request | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:7:12:7:18 | ControlFlowNode for request | a user-provided value |
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:7:12:7:18 | ControlFlowNode for request | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on $@ may run slow on strings with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:7:12:7:18 | ControlFlowNode for request | a user-provided value |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-730/PolynomialReDoS.ql

View File

@@ -0,0 +1,9 @@
import re
from flask import Flask, request
app = Flask(__name__)
@app.route("/poly-redos")
def code_execution():
text = request.args.get("text")
re.sub(r"^\s+|\s+$", "", text) # NOT OK
re.match(r"^0\.\d+E?\d+$", text) # NOT OK

View File

@@ -0,0 +1,94 @@
import re
# linear
# https://github.com/github/codeql-python-CVE-coverage/issues/439
rex_blame = re.compile(r'\s*(\d+)\s*(\S+) (.*)')
# https://github.com/github/codeql-python-CVE-coverage/issues/402
whitespace = br"[\000\011\012\014\015\040]"
whitespace_optional = whitespace + b"*"
newline_only = br"[\r\n]+"
newline = whitespace_optional + newline_only + whitespace_optional
toFlag = re.compile(newline)
# https://github.com/github/codeql-python-CVE-coverage/issues/400
re.compile(r'[+-]?(\d+)*\.\d+%?')
re.compile(r'"""\s+(?:.|\n)*?\s+"""')
re.compile(r'(\{\s+)(\S+)(\s+[^}]+\s+\}\s)')
re.compile(r'".*``.*``.*"')
re.compile(r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)')
re.compile(r'(%config)(\s*\(\s*)(\w+)(\s*=\s*)(.*?)(\s*\)\s*)')
re.compile(r'(%new)(\s*)(\()(\s*.*?\s*)(\))')
re.compile(r'(\$)(evoque|overlay)(\{(%)?)(\s*[#\w\-"\'.]+[^=,%}]+?)?')
re.compile(r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)')
# linear
# https://github.com/github/codeql-python-CVE-coverage/issues/392
simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
# https://github.com/github/codeql-python-CVE-coverage/issues/249
rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
'realm=(["\']?)([^"\']*)\\2', re.I)
# https://github.com/github/codeql-python-CVE-coverage/issues/248
gauntlet = re.compile(
r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
flags=re.U
)
# https://github.com/github/codeql-python-CVE-coverage/issues/227
# from .compat import tobytes
WS = "[ \t]"
OWS = WS + "{0,}?"
# RFC 7230 Section 3.2.6 "Field Value Components":
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# / DIGIT / ALPHA
# obs-text = %x80-FF
TCHAR = r"[!#$%&'*+\-.^_`|~0-9A-Za-z]"
OBS_TEXT = r"\x80-\xff"
TOKEN = TCHAR + "{1,}"
# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"
# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text
# Errata from: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
# changes field-content to:
#
# field-content = field-vchar [ 1*( SP / HTAB / field-vchar )
# field-vchar ]
FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
FIELD_CONTENT = FIELD_VCHAR + "([ \t" + VCHAR + OBS_TEXT + "]+" + FIELD_VCHAR + "){,1}"
FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
HEADER_FIELD = re.compile(
# tobytes(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
# )
)
# https://github.com/github/codeql-python-CVE-coverage/issues/224
pattern = re.compile(
r'^(:?(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|' # domain pt.1
r'([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|' # domain pt.2
r'([a-zA-Z0-9][-_a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.)+' # domain pt.3
r'([a-zA-Z]{2,13}|(xn--[a-zA-Z0-9]{2,30}))$' # TLD
)
# https://github.com/github/codeql-python-CVE-coverage/issues/189
URL_REGEX = (
r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|'
r'[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|'
r'(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|'
r'[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' # "emacs!
)
url = re.compile(URL_REGEX)

View File

@@ -0,0 +1,97 @@
| KnownCVEs.py:15:22:15:24 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
| KnownCVEs.py:30:24:31:25 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
| KnownCVEs.py:35:18:35:81 | ([-/:,#%.'"\\s!\\w]\|\\w-\\w\|'[\\s\\w]+'\\s*\|"[\\s\\w]+"\|\\([\\d,%\\.\\s]+\\))* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"\\t"'. |
| redos.py:6:28:6:42 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
| redos.py:6:52:6:68 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
| redos.py:21:34:21:53 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:57:21:76 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:21:81:21:100 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
| redos.py:33:64:33:65 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
| redos.py:38:33:38:42 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
| redos.py:43:37:43:38 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
| redos.py:49:41:49:43 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '""'. |
| redos.py:49:47:49:49 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with ''' and containing many repetitions of ''''. |
| redos.py:54:47:54:49 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:54:80:54:82 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
| redos.py:60:25:60:30 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:61:25:61:30 | [a-z]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:62:53:62:64 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:63:26:63:33 | ([a-z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| redos.py:68:26:68:41 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\t'. |
| redos.py:68:48:68:50 | .*? | This part of the regular expression may cause exponential backtracking on strings starting with '[' and containing many repetitions of ']['. |
| redos.py:73:29:73:36 | (\\\\?.)*? | This part of the regular expression may cause exponential backtracking on strings starting with '"' and containing many repetitions of '\\\\a'. |
| redos.py:76:24:76:31 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:79:24:79:31 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:91:24:91:31 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:97:25:97:38 | ([\\s\\S]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
| redos.py:103:25:103:33 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
| redos.py:109:25:109:33 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:112:25:112:33 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
| redos.py:115:25:115:37 | ([0-9]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:127:25:127:38 | ([a-z]\|[d-h])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
| redos.py:130:25:130:40 | ([^a-z]\|[^0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
| redos.py:133:25:133:35 | (\\d\|[0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:136:25:136:32 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
| redos.py:160:25:160:32 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:163:25:163:32 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:166:25:166:34 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:169:25:169:37 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
| redos.py:172:25:172:33 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:175:26:175:30 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:187:26:187:31 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
| redos.py:190:27:190:29 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '\\n' and containing many repetitions of '\\n'. |
| redos.py:193:28:193:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:196:78:196:89 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A:' and containing many repetitions of ' A:'. |
| redos.py:196:91:196:92 | ,? | This part of the regular expression may cause exponential backtracking on strings starting with '{[A(A)A: ' and containing many repetitions of ',A: '. |
| redos.py:199:25:199:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:199:28:199:29 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:202:26:202:32 | (a+a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:202:27:202:28 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:205:25:205:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:211:25:211:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:217:25:217:27 | \\n+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
| redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
| redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
| redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
| redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
| redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:49:256:51 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:256:61:256:63 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbazfoobarbazfoobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '"" a='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:286:26:286:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:292:26:292:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:295:35:295:36 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:301:100:301:101 | e+ | This part of the regular expression may cause exponential backtracking on strings starting with ';00000000000000' and containing many repetitions of 'e'. |
| redos.py:304:28:304:29 | c+ | This part of the regular expression may cause exponential backtracking on strings starting with 'ab' and containing many repetitions of 'c'. |
| redos.py:307:28:307:30 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:310:26:310:34 | ([^/]\|X)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'X'. |
| redos.py:313:30:313:34 | [^Y]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Xx'. |
| redos.py:316:25:316:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:319:28:319:33 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
| redos.py:322:25:322:29 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |
| redos.py:325:24:325:30 | (a?a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:334:24:334:32 | (?:a\|a?)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:340:27:340:55 | (([a-c]\|[c-d])T(e?e?e?e?\|X))+ | This part of the regular expression may cause exponential backtracking on strings starting with 'PRE' and containing many repetitions of 'cTX'. |
| redos.py:343:26:343:29 | (a)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'aa'. |
| redos.py:346:26:346:27 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bb'. |
| redos.py:352:25:352:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:353:25:353:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:354:25:354:26 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:355:25:355:26 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| redos.py:362:25:362:40 | ((?:a{\|-)\|\\w\\{)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{'. |
| redos.py:363:25:363:43 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
| redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
| redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-730/ReDoS.ql

View File

@@ -0,0 +1,374 @@
import re
# NOT GOOD; attack: "_" + "__".repeat(100)
# Adapted from marked (https://github.com/markedjs/marked), which is licensed
# under the MIT license; see file marked-LICENSE.
bad1 = re.compile(r'''^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)''')
# GOOD
# Adapted from marked (https://github.com/markedjs/marked), which is licensed
# under the MIT license; see file marked-LICENSE.
good1 = re.compile(r'^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)')
# GOOD - there is no witness in the end that could cause the regexp to not match
# Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
# which is licensed under the MIT license; see file brace-expansion-LICENSE.
good2 = re.compile(r'(.*,)+.+')
# NOT GOOD; attack: " '" + "\\\\".repeat(100)
# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
# which is licensed under the MIT license; see file CodeMirror-LICENSE.
bad2 = re.compile(r'''^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?''')
# GOOD
# Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
good2 = re.compile(r'''\(\*(?:[\s\S]*?\(\*[\s\S]*?\*\))*[\s\S]*?\*\)''')
# GOOD
# Adapted from jest (https://github.com/facebook/jest), which is licensed
# under the MIT license; see file jest-LICENSE.
good3 = re.compile(r'''^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*''')
# NOT GOOD, variant of good3; attack: "a|\n:|\n" + "||\n".repeat(100)
bad4 = re.compile(r'''^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)a''')
# NOT GOOD; attack: "/" + "\\/a".repeat(100)
# Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
# which is licensed under the Apache License 2.0; see file ANodeBlog-LICENSE.
bad5 = re.compile(r'''\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)''')
# NOT GOOD; attack: "##".repeat(100) + "\na"
# Adapted from CodeMirror (https://github.com/codemirror/codemirror),
# which is licensed under the MIT license; see file CodeMirror-LICENSE.
bad6 = re.compile(r'''^([\s\[\{\(]|#.*)*$''')
# GOOD
good4 = re.compile(r'''(\r\n|\r|\n)+''')
# BAD - PoC: `node -e "/((?:[^\"\']|\".*?\"|\'.*?\')*?)([(,)]|$)/.test(\"'''''''''''''''''''''''''''''''''''''''''''''\\\"\");"`. It's complicated though, because the regexp still matches something, it just matches the empty-string after the attack string.
actuallyBad = re.compile(r'''((?:[^"']|".*?"|'.*?')*?)([(,)]|$)''')
# NOT GOOD; attack: "a" + "[]".repeat(100) + ".b\n"
# Adapted from Knockout (https://github.com/knockout/knockout), which is
# licensed under the MIT license; see file knockout-LICENSE
bad6 = re.compile(r'''^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$''')
# GOOD
good6 = re.compile(r'''(a|.)*''')
# Testing the NFA - only some of the below are detected.
bad7 = re.compile(r'''^([a-z]+)+$''')
bad8 = re.compile(r'''^([a-z]*)*$''')
bad9 = re.compile(r'''^([a-zA-Z0-9])(([\\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$''')
bad10 = re.compile(r'''^(([a-z])+.)+[A-Z]([a-z])+$''')
# NOT GOOD; attack: "[" + "][".repeat(100) + "]!"
# Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
# is licensed under the MIT license; see file Prototype.js-LICENSE.
bad11 = re.compile(r'''(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)''')
# NOT GOOD; attack: "'" + "\\a".repeat(100) + '"'
# Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
# under the MIT license; see file Prism-LICENSE.
bad12 = re.compile(r'''("|')(\\?.)*?\1''')
# NOT GOOD
bad13 = re.compile(r'''(b|a?b)*c''')
# NOT GOOD
bad15 = re.compile(r'''(a|aa?)*b''')
# GOOD
good7 = re.compile(r'''(.|\n)*!''')
# NOT GOOD; attack: "\n".repeat(100) + "."
bad16 = re.compile(r'''(.|\n)*!''')
# GOOD
good8 = re.compile(r'''([\w.]+)*''')
# NOT GOOD
bad17 = re.compile(r'''(a|aa?)*b''')
# GOOD - not used as regexp
good9 = '(a|aa?)*b'
# NOT GOOD
bad18 = re.compile(r'''(([\s\S]|[^a])*)"''')
# GOOD - there is no witness in the end that could cause the regexp to not match
good10 = re.compile(r'''([^"']+)*''')
# NOT GOOD
bad20 = re.compile(r'''((.|[^a])*)"''')
# GOOD
good10 = re.compile(r'''((a|[^a])*)"''')
# NOT GOOD
bad21 = re.compile(r'''((b|[^a])*)"''')
# NOT GOOD
bad22 = re.compile(r'''((G|[^a])*)"''')
# NOT GOOD
bad23 = re.compile(r'''(([0-9]|[^a])*)"''')
# NOT GOOD
bad24 = re.compile(r'''(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?''')
# NOT GOOD
bad25 = re.compile(r'''"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"''')
# GOOD
bad26 = re.compile(r'''"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"''')
# NOT GOOD
bad27 = re.compile(r'''(([a-z]|[d-h])*)"''')
# NOT GOOD
bad27 = re.compile(r'''(([^a-z]|[^0-9])*)"''')
# NOT GOOD
bad28 = re.compile(r'''((\d|[0-9])*)"''')
# NOT GOOD
bad29 = re.compile(r'''((\s|\s)*)"''')
# NOT GOOD
bad30 = re.compile(r'''((\w|G)*)"''')
# GOOD
good11 = re.compile(r'''((\s|\d)*)"''')
# NOT GOOD
bad31 = re.compile(r'''((\d|\w)*)"''')
# NOT GOOD
bad32 = re.compile(r'''((\d|5)*)"''')
# NOT GOOD
bad33 = re.compile(r'''((\s|[\f])*)"''')
# NOT GOOD
bad34 = re.compile(r'''((\s|[\v]|\\v)*)"''')
# NOT GOOD
bad35 = re.compile(r'''((\f|[\f])*)"''')
# NOT GOOD
bad36 = re.compile(r'''((\W|\D)*)"''')
# NOT GOOD
bad37 = re.compile(r'''((\S|\w)*)"''')
# NOT GOOD
bad38 = re.compile(r'''((\S|[\w])*)"''')
# NOT GOOD
bad39 = re.compile(r'''((1s|[\da-z])*)"''')
# NOT GOOD
bad40 = re.compile(r'''((0|[\d])*)"''')
# NOT GOOD
bad41 = re.compile(r'''(([\d]+)*)"''')
# GOOD - there is no witness in the end that could cause the regexp to not match
good12 = re.compile(r'''(\d+(X\d+)?)+''')
# GOOD - there is no witness in the end that could cause the regexp to not match
good13 = re.compile(r'''([0-9]+(X[0-9]*)?)*''')
# GOOD
good15 = re.compile(r'''^([^>]+)*(>|$)''')
# NOT GOOD
bad43 = re.compile(r'''^([^>a]+)*(>|$)''')
# NOT GOOD
bad44 = re.compile(r'''(\n\s*)+$''')
# NOT GOOD
bad45 = re.compile(r'''^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})''')
# NOT GOOD
bad46 = re.compile(r'''\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}''')
# NOT GOOD
bad47 = re.compile(r'''(a+|b+|c+)*c''')
# NOT GOOD
bad48 = re.compile(r'''(((a+a?)*)+b+)''')
# NOT GOOD
bad49 = re.compile(r'''(a+)+bbbb''')
# GOOD
good16 = re.compile(r'''(a+)+aaaaa*a+''')
# NOT GOOD
bad50 = re.compile(r'''(a+)+aaaaa$''')
# GOOD
good17 = re.compile(r'''(\n+)+\n\n''')
# NOT GOOD
bad51 = re.compile(r'''(\n+)+\n\n$''')
# NOT GOOD
bad52 = re.compile(r'''([^X]+)*$''')
# NOT GOOD
bad53 = re.compile(r'''(([^X]b)+)*$''')
# GOOD
good18 = re.compile(r'''(([^X]b)+)*($|[^X]b)''')
# NOT GOOD
bad54 = re.compile(r'''(([^X]b)+)*($|[^X]c)''')
# GOOD
good20 = re.compile(r'''((ab)+)*ababab''')
# GOOD
good21 = re.compile(r'''((ab)+)*abab(ab)*(ab)+''')
# GOOD
good22 = re.compile(r'''((ab)+)*''')
# NOT GOOD
bad55 = re.compile(r'''((ab)+)*$''')
# GOOD
good23 = re.compile(r'''((ab)+)*[a1][b1][a2][b2][a3][b3]''')
# NOT GOOD
bad56 = re.compile(r'''([\n\s]+)*(.)''')
# GOOD - any witness passes through the accept state.
good24 = re.compile(r'''(A*A*X)*''')
# GOOD
good26 = re.compile(r'''([^\\\]]+)*''')
# NOT GOOD
bad59 = re.compile(r'''(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-''')
# NOT GOOD
bad60 = re.compile(r'''(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamnlongstringforstresstestingthequery)*-''')
# NOT GOOD
bad61 = re.compile(r'''(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-''')
# GOOD
good27 = re.compile(r'''(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-''')
# GOOD
good28 = re.compile(r'''foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo''')
# GOOD
good29 = re.compile(r'''foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo''')
# NOT GOOD (but cannot currently construct a prefix)
bad62 = re.compile(r'''a{2,3}(b+)+X''')
# NOT GOOD (and a good prefix test)
bad63 = re.compile(r'''^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>''')
# GOOD
good30 = re.compile(r'''(a+)*[\s\S][\s\S][\s\S]?''')
# GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
good31 = re.compile(r'''(a+)*[\s\S]{2,3}''')
# GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
good32 = re.compile(r'''(a+)*([\s\S]{2,}|X)$''')
# GOOD
good33 = re.compile(r'''(a+)*([\s\S]*|X)$''')
# NOT GOOD
bad64 = re.compile(r'''((a+)*$|[\s\S]+)''')
# GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
good34 = re.compile(r'''([\s\S]+|(a+)*$)''')
# GOOD
good35 = re.compile(r'''((;|^)a+)+$''')
# NOT GOOD (a good prefix test)
bad65 = re.compile(r'''(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f''')
# NOT GOOD
bad66 = re.compile(r'''^ab(c+)+$''')
# NOT GOOD
bad67 = re.compile(r'''(\d(\s+)*){20}''')
# GOOD - but we spuriously conclude that a rejecting suffix exists.
good36 = re.compile(r'''(([^/]|X)+)(\/[\s\S]*)*$''')
# GOOD - but we spuriously conclude that a rejecting suffix exists.
good37 = re.compile(r'''^((x([^Y]+)?)*(Y|$))''')
# NOT GOOD
bad68 = re.compile(r'''(a*)+b''')
# NOT GOOD
bad69 = re.compile(r'''foo([\w-]*)+bar''')
# NOT GOOD
bad70 = re.compile(r'''((ab)*)+c''')
# NOT GOOD
bad71 = re.compile(r'''(a?a?)*b''')
# GOOD
good38 = re.compile(r'''(a?)*b''')
# NOT GOOD - but not detected
bad72 = re.compile(r'''(c?a?)*b''')
# NOT GOOD
bad73 = re.compile(r'''(?:a|a?)+b''')
# NOT GOOD - but not detected.
bad74 = re.compile(r'''(a?b?)*$''')
# NOT GOOD
bad76 = re.compile(r'''PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)''')
# NOT GOOD - but not detected
bad77 = re.compile(r'''^((a)+\w)+$''')
# NOT GOOD
bad78 = re.compile(r'''^(b+.)+$''')
# GOOD
good39 = re.compile(r'''a*b''')
# All 4 bad combinations of nested * and +
bad79 = re.compile(r'''(a*)*b''')
bad80 = re.compile(r'''(a+)*b''')
bad81 = re.compile(r'''(a*)+b''')
bad82 = re.compile(r'''(a+)+b''')
# GOOD
good40 = re.compile(r'''(a|b)+''')
good41 = re.compile(r'''(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+''') # parses wrongly, sees column 42 as a char set start
# NOT GOOD
bad83 = re.compile(r'''^((?:a{|-)|\w\{)+X$''')
bad84 = re.compile(r'''^((?:a{0|-)|\w\{\d)+X$''')
bad85 = re.compile(r'''^((?:a{0,|-)|\w\{\d,)+X$''')
bad86 = re.compile(r'''^((?:a{0,2|-)|\w\{\d,\d)+X$''')
# GOOD:
good42 = re.compile(r'''^((?:a{0,2}|-)|\w\{\d,\d\})+X$''')
# NOT GOOD
bad87 = re.compile(r'X(\u0061|a)*Y')
# GOOD
good43 = re.compile(r'X(\u0061|b)+Y')

View File

@@ -0,0 +1,9 @@
import re
# Treatment of escapes
re.compile(r"X([^\.]|\.)*$") # No ReDoS.
re.compile(r"X(Æ|\Æ)+$") # Has ReDoS.
# Treatment of line breaks
re.compile(r'(?:.|\n)*b') # No ReDoS.
re.compile(r'(?:.|\n)*b', re.DOTALL) # Has ReDoS.