Merge branch 'main' of https://github.com/github/codeql into python/add-comprehension-capture-flow

This commit is contained in:
Rasmus Lerchedahl Petersen
2024-10-04 14:53:03 +02:00
733 changed files with 11451 additions and 5180 deletions

View File

@@ -65,7 +65,7 @@ string getCallEdgeValue(CallNode call, Function target) {
else
exists(string fixedRelativePath |
fixedRelativePath =
target.getLocation().getFile().getRelativePath().regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
target.getLocation().getFile().getAbsolutePath().regexpCapture(".*/CallGraph[^/]*/(.*)", 1)
|
// the value needs to be enclosed in quotes to allow special characters
result = "\"" + fixedRelativePath + ":" + betterQualName(target) + "\""

View File

@@ -3,6 +3,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.Concepts
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
private import codeql.threatmodels.ThreatModels
module SystemCommandExecutionTest implements TestSig {
string getARelevantTag() { result = "getCommand" }
@@ -632,6 +633,22 @@ module XmlParsingTest implements TestSig {
}
}
module ThreatModelSourceTest implements TestSig {
string getARelevantTag() {
exists(string kind | knownThreatModel(kind) | result = "threatModelSource" + "[" + kind + "]")
}
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(ThreatModelSource src | not src.getThreatModel() = "remote" |
location = src.getLocation() and
element = src.toString() and
value = prettyNodeForInlineTest(src) and
tag = "threatModelSource[" + src.getThreatModel() + "]"
)
}
}
module CorsMiddlewareTest implements TestSig {
string getARelevantTag() { result = "CorsMiddleware" }
@@ -656,4 +673,4 @@ import MakeTest<MergeTests5<MergeTests5<SystemCommandExecutionTest, DecodingTest
MergeTests5<FileSystemAccessTest, FileSystemWriteAccessTest, PathNormalizationTest,
SafeAccessCheckTest, PublicKeyGenerationTest>,
MergeTests5<CryptographicOperationTest, HttpClientRequestTest, CsrfProtectionSettingTest,
CsrfLocalProtectionSettingTest, XmlParsingTest>>>
CsrfLocalProtectionSettingTest, MergeTests<XmlParsingTest, ThreatModelSourceTest>>>>

View File

@@ -15,6 +15,7 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.Concepts
DataFlow::Node shouldBeTainted() {
exists(DataFlow::CallCfgNode call |
@@ -45,7 +46,7 @@ module Conf {
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
or
source instanceof RemoteFlowSource
source instanceof ThreatModelSource
}
predicate isSink(DataFlow::Node sink) {

View File

@@ -75,6 +75,7 @@ edges
| UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | provenance | |
| UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | UnsafeUnpack.py:161:19:161:21 | ControlFlowNode for tar | provenance | |
| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | Config |
| UnsafeUnpack.py:161:38:161:45 | ControlFlowNode for savepath | UnsafeUnpack.py:161:25:161:46 | ControlFlowNode for Attribute() | provenance | MaD:67 |
| UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | UnsafeUnpack.py:166:37:166:42 | ControlFlowNode for member | provenance | |
| UnsafeUnpack.py:163:33:163:35 | ControlFlowNode for tar | UnsafeUnpack.py:163:23:163:28 | ControlFlowNode for member | provenance | |
| UnsafeUnpack.py:166:23:166:28 | [post] ControlFlowNode for result | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | provenance | |

View File

@@ -1,13 +1,23 @@
edges
| test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | |
| test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:83 |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | |
| test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:83 |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | |
| test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:83 |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | |
| test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:83 |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | |
| test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:83 |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | |
| test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config |
@@ -37,14 +47,19 @@ edges
| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | provenance | |
nodes
| test.py:10:16:10:24 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:11:5:11:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:11:5:11:52 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:11:21:11:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:12:5:12:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:12:5:12:48 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:12:21:12:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:14:10:14:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:14:26:14:34 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:15:14:15:29 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:18:10:18:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:18:26:18:34 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:19:14:19:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:22:5:22:30 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:22:5:22:60 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:22:21:22:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| test.py:24:5:24:52 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |

View File

@@ -142,6 +142,14 @@ def test_dict_from_dict():
SINK(d2["k"]) #$ flow="SOURCE, l:-2 -> d2['k']"
SINK_F(d2["k1"])
@expects(4)
def test_dict_from_multiple_args():
d = dict([("k", SOURCE), ("k1", NONSOURCE)], k2 = SOURCE, k3 = NONSOURCE)
SINK(d["k"]) #$ MISSING: flow="SOURCE, l:-1 -> d['k']"
SINK_F(d["k1"])
SINK(d["k2"]) #$ flow="SOURCE, l:-3 -> d['k2']"
SINK_F(d["k3"])
## Container methods
### List

View File

@@ -6,7 +6,7 @@ import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
@@ -15,7 +15,7 @@ def main():
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv
if __name__ == '__main__':

View File

@@ -11,6 +11,6 @@ import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
application = get_asgi_application()

View File

@@ -11,6 +11,6 @@ import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
application = get_wsgi_application()

View File

@@ -12,3 +12,24 @@ with psycopg.connect(...) as conn:
with conn.cursor() as cursor:
cursor.execute("some sql", (42,)) # $ getSql="some sql"
cursor.executemany("some sql", [(42,)]) # $ getSql="some sql"
### test of threat-model sources
row = cursor.fetchone() # $ threatModelSource[database]=cursor.fetchone()
rows_many = cursor.fetchmany(10) # $ threatModelSource[database]=cursor.fetchmany(..)
rows_all = cursor.fetchall() # $ threatModelSource[database]=cursor.fetchall()
ensure_tainted(
row[0], # $ tainted
rows_many[0][0], # $ tainted
rows_all[0][0], # $ tainted
# pretending we created cursor to return dictionary results
row["column"], # $ tainted
rows_many[0]["column"], # $ tainted
rows_all[0]["column"], # $ tainted
)
for row in rows_many:
ensure_tainted(row[0], row["column"]) # $ tainted
for row in rows_all:
ensure_tainted(row[0], row["column"]) # tainted

View File

@@ -6,7 +6,7 @@ import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
@@ -15,7 +15,7 @@ def main():
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv
if __name__ == '__main__':

View File

@@ -17,7 +17,7 @@ p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
name = windows.parent.name
o = open
o(name) # $ getAPathArgument=name
o(name) # $ getAPathArgument=name threatModelSource[file]=o(..)
wb = p.write_bytes
wb(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"

View File

@@ -5,25 +5,25 @@ import stat
import tempfile
import shutil
open("file") # $ getAPathArgument="file"
open(file="file") # $ getAPathArgument="file"
open("file") # $ getAPathArgument="file" threatModelSource[file]=open(..)
open(file="file") # $ getAPathArgument="file" threatModelSource[file]=open(..)
o = open
o("file") # $ getAPathArgument="file"
o(file="file") # $ getAPathArgument="file"
o("file") # $ getAPathArgument="file" threatModelSource[file]=o(..)
o(file="file") # $ getAPathArgument="file" threatModelSource[file]=o(..)
builtins.open("file") # $ getAPathArgument="file"
builtins.open(file="file") # $ getAPathArgument="file"
builtins.open("file") # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..)
builtins.open(file="file") # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..)
io.open("file") # $ getAPathArgument="file"
io.open(file="file") # $ getAPathArgument="file"
io.open("file") # $ getAPathArgument="file" threatModelSource[file]=io.open(..)
io.open(file="file") # $ getAPathArgument="file" threatModelSource[file]=io.open(..)
io.open_code("file") # $ getAPathArgument="file"
io.FileIO("file") # $ getAPathArgument="file"
f = open("path") # $ getAPathArgument="path"
f = open("path") # $ getAPathArgument="path" threatModelSource[file]=open(..)
f.write("foo") # $ getAPathArgument="path" fileWriteData="foo"
lines = ["foo"]
f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines

View File

@@ -43,3 +43,12 @@ class MyLogger(logging.Logger):
pass
MyLogger("bar").info("hello") # $ loggingInput="hello"
class CustomLogger(logging.getLoggerClass()):
pass
CustomLogger("baz").info("hello") # $ loggingInput="hello"
class LoggerSubClassUsingSelf(logging.Logger):
def foo(self):
self.info("hello") # $ loggingInput="hello"

View File

@@ -0,0 +1,71 @@
import os
import sys
import posix
ensure_tainted(
os.getenv("foo"), # $ tainted threatModelSource[environment]=os.getenv(..)
os.getenvb("bar"), # $ tainted threatModelSource[environment]=os.getenvb(..)
os.environ["foo"], # $ tainted threatModelSource[environment]=os.environ
os.environ.get("foo"), # $ tainted threatModelSource[environment]=os.environ
os.environb["bar"], # $ tainted threatModelSource[environment]=os.environb
posix.environ[b"foo"], # $ tainted threatModelSource[environment]=posix.environ
sys.argv[1], # $ tainted threatModelSource[commandargs]=sys.argv
sys.orig_argv[1], # $ tainted threatModelSource[commandargs]=sys.orig_argv
)
for k,v in os.environ.items(): # $ threatModelSource[environment]=os.environ
ensure_tainted(k) # $ tainted
ensure_tainted(v) # $ tainted
########################################
# argparse
########################################
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("foo")
args = parser.parse_args() # $ threatModelSource[commandargs]=parser.parse_args()
ensure_tainted(args.foo) # $ tainted
explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv
ensure_tainted(explicit_argv_parsing.foo) # $ tainted
fake_args = parser.parse_args(["<foo>"])
ensure_not_tainted(fake_args.foo) # $ SPURIOUS: tainted
########################################
# reading input from stdin
########################################
ensure_tainted(
sys.stdin.readline(), # $ tainted threatModelSource[stdin]=sys.stdin
input(), # $ tainted threatModelSource[stdin]=input()
)
########################################
# reading data from files
########################################
ensure_tainted(
open("foo"), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
open("foo").read(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
open("foo").readline(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
open("foo").readlines(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
os.read(os.open("foo"), 1024), # $ tainted threatModelSource[file]=os.read(..) getAPathArgument="foo"
)
########################################
# socket
########################################
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(("example.com", 1234))
ensure_tainted(s.recv(1024)) # $ MISSING: tainted threatModelSource[socket]

View File

@@ -45,7 +45,7 @@ def func2(environ, start_response): # $ requestHandler
start_response(status, headers) # $ headerWriteBulk=headers headerWriteBulkUnsanitized=name,value
return [b"Hello"] # $ HttpResponse responseBody=List
case = sys.argv[1]
case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv
if case == "1":
server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
server.set_app(func)

View File

@@ -0,0 +1,4 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
testFailures
failures

View File

@@ -0,0 +1,2 @@
import experimental.meta.InlineTaintTest
import MakeInlineTaintTest<TestTaintTrackingConfig>

View File

@@ -0,0 +1,10 @@
import urllib.parse
def test():
ts = TAINTED_STRING
params = urllib.parse.parse_qs(ts)
ensure_tainted(
params, # $ tainted
)

View File

@@ -0,0 +1,3 @@
| default |
| remote |
| request |

View File

@@ -0,0 +1,7 @@
private import codeql.threatmodels.ThreatModels
from string kind
where
knownThreatModel(kind) and
currentThreatModel(kind)
select kind

View File

@@ -0,0 +1,8 @@
edges
| test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | provenance | Src:MaD:17 |
nodes
| test.py:6:14:6:21 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:6:14:6:24 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
#select
| test.py:6:14:6:24 | ControlFlowNode for Subscript | test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | This SQL query depends on a $@. | test.py:6:14:6:21 | ControlFlowNode for Attribute | user-provided value |

View File

@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/threat-models
extensible: threatModelConfiguration
data:
- ["local", true, 0]

View File

@@ -0,0 +1 @@
Security/CWE-089/SqlInjection.ql

View File

@@ -0,0 +1,6 @@
# test that enabling local threat-model works end-to-end
import sys
import psycopg
conn = psycopg.connect(...)
conn.execute(sys.argv[1])