mirror of
https://github.com/github/codeql.git
synced 2026-07-03 02:25:29 +02:00
Compare commits
12 Commits
python-two
...
yoff/pytho
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47d2b05bc5 | ||
|
|
41c9d8b80a | ||
|
|
fbfbbd342a | ||
|
|
872c08148e | ||
|
|
3983e4db29 | ||
|
|
3058198c0d | ||
|
|
46382cbc8e | ||
|
|
93439db87b | ||
|
|
70ca7af04c | ||
|
|
664f0125b9 | ||
|
|
1b7f589000 | ||
|
|
eb7f8cc43d |
2
python/ql/consistency-queries/CfgConsistency.ql
Normal file
2
python/ql/consistency-queries/CfgConsistency.ql
Normal file
@@ -0,0 +1,2 @@
|
||||
import semmle.python.controlflow.internal.AstNodeImpl
|
||||
import ControlFlow::Consistency
|
||||
4
python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md
Normal file
4
python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* A new Python control flow graph implementation has been added under `semmle.python.controlflow.internal.Cfg` (backed by `AstNodeImpl.qll`), built on the shared `codeql.controlflow.ControlFlowGraph` library. It is not yet used by the dataflow library or any production query; the legacy CFG in `semmle/python/Flow.qll` remains the default. The new library is exposed for tests and for upcoming migrations.
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The new (shared-CFG-based) Python control flow graph now visits parameter and return type annotations as CFG nodes for function definitions, matching the legacy CFG. This restores annotation-based type tracking through framework models such as FastAPI's `Depends()`, Pydantic request models, Starlette `WebSocket` handlers, and any other models that flow a class reference through `Parameter.getAnnotation()` to identify instances of the annotated class.
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- Temporarily disabled the `instanceFieldStep` disjunct of the internal `TypeTrackingInput::levelStepCall` predicate, which was introduced in 7.2.0 and caused catastrophic query slowdowns on some OOP-heavy Python codebases (e.g. `mypy` and `dask`).
|
||||
42
python/ql/lib/ide-contextual-queries/printCfg.ql
Normal file
42
python/ql/lib/ide-contextual-queries/printCfg.ql
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* @name Print CFG
|
||||
* @description Produces a representation of a file's Control Flow Graph.
|
||||
* This query is used by the VS Code extension.
|
||||
* @id py/print-cfg
|
||||
* @kind graph
|
||||
* @tags ide-contextual-queries/print-cfg
|
||||
*/
|
||||
|
||||
import semmle.python.Files as Files
|
||||
// import semmle.python.Scope
|
||||
import semmle.python.controlflow.internal.AstNodeImpl
|
||||
|
||||
external string selectedSourceFile();
|
||||
|
||||
private predicate selectedSourceFileAlias = selectedSourceFile/0;
|
||||
|
||||
external int selectedSourceLine();
|
||||
|
||||
private predicate selectedSourceLineAlias = selectedSourceLine/0;
|
||||
|
||||
external int selectedSourceColumn();
|
||||
|
||||
private predicate selectedSourceColumnAlias = selectedSourceColumn/0;
|
||||
|
||||
module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig<Files::File> {
|
||||
predicate selectedSourceFile = selectedSourceFileAlias/0;
|
||||
|
||||
predicate selectedSourceLine = selectedSourceLineAlias/0;
|
||||
|
||||
predicate selectedSourceColumn = selectedSourceColumnAlias/0;
|
||||
|
||||
predicate cfgScopeSpan(
|
||||
Ast::Callable scope, Files::File file, int startLine, int startColumn, int endLine,
|
||||
int endColumn
|
||||
) {
|
||||
file = scope.getLocation().getFile() and
|
||||
scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
|
||||
}
|
||||
}
|
||||
|
||||
import ControlFlow::ViewCfgQuery<Files::File, ViewCfgQueryInput>
|
||||
1771
python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll
Normal file
1771
python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll
Normal file
File diff suppressed because it is too large
Load Diff
1025
python/ql/lib/semmle/python/controlflow/internal/Cfg.qll
Normal file
1025
python/ql/lib/semmle/python/controlflow/internal/Cfg.qll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1138,9 +1138,7 @@ predicate clearsContent(Node n, ContentSet cs) {
|
||||
* Holds if the value that is being tracked is expected to be stored inside content `c`
|
||||
* at node `n`.
|
||||
*/
|
||||
predicate expectsContent(Node n, ContentSet c) {
|
||||
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
|
||||
}
|
||||
predicate expectsContent(Node n, ContentSet c) { none() }
|
||||
|
||||
/**
|
||||
* Holds if values stored inside attribute `c` are cleared at node `n`.
|
||||
|
||||
@@ -91,8 +91,6 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
|
||||
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
|
||||
}
|
||||
|
||||
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
|
||||
|
||||
bindingset[token]
|
||||
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
|
||||
// needed to support `Argument[x..y]` ranges
|
||||
|
||||
@@ -170,13 +170,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
||||
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
||||
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) {
|
||||
// HOTFIX: `instanceFieldStep` is temporarily disabled (via `and none()`).
|
||||
// It uses `classInstanceTracker(cls)` -- itself a type-tracker run --
|
||||
// from inside `levelStepCall`, creating a structural mutual recursion
|
||||
// that causes catastrophic query slowdowns on some OOP-heavy Python
|
||||
// codebases (e.g. mypy and dask). The `and none()` should be removed
|
||||
// once that recursion is redesigned.
|
||||
instanceFieldStep(nodeFrom, nodeTo) and none()
|
||||
instanceFieldStep(nodeFrom, nodeTo)
|
||||
or
|
||||
inheritedFieldStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
@@ -4199,9 +4199,11 @@ module StdlibPrivate {
|
||||
// The positional argument contains a mapping.
|
||||
// TODO: these values can be overwritten by keyword arguments
|
||||
// - dict mapping
|
||||
input = "Argument[0].WithAnyDictionaryElement" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[0].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.DictionaryElement[" + key + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
// - list-of-pairs mapping
|
||||
input = "Argument[0].ListElement.TupleElement[1]" and
|
||||
@@ -4238,7 +4240,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
// Element content is mutated into list element content
|
||||
@@ -4262,9 +4266,11 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[0].WithAnyTupleElement" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]" and
|
||||
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
input = "Argument[0].ListElement" and
|
||||
output = "ReturnValue" and
|
||||
@@ -4288,7 +4294,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.SetElement" and
|
||||
@@ -4334,7 +4342,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement" and
|
||||
@@ -4362,7 +4372,9 @@ module StdlibPrivate {
|
||||
or
|
||||
content = "SetElement"
|
||||
or
|
||||
content = "AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
content = "TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
|
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
input = "Argument[0]." + content and
|
||||
@@ -4392,7 +4404,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement" and
|
||||
@@ -4420,7 +4434,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue" and
|
||||
@@ -4452,7 +4468,9 @@ module StdlibPrivate {
|
||||
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i = 0 and
|
||||
input = "Argument[1].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "Argument[0].Parameter[" + i.toString() + "]" and
|
||||
@@ -4481,7 +4499,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[1].SetElement"
|
||||
or
|
||||
input = "Argument[1].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
|
||||
@@ -4505,7 +4525,9 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
input = "Argument[0].AnyTupleElement"
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
@@ -4530,7 +4552,12 @@ module StdlibPrivate {
|
||||
or
|
||||
input = "Argument[" + i.toString() + "].SetElement"
|
||||
or
|
||||
input = "Argument[" + i.toString() + "].AnyTupleElement"
|
||||
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i in [0 .. 1] and
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
|
||||
@@ -4553,6 +4580,12 @@ module StdlibPrivate {
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(DataFlow::Content c |
|
||||
input = "Argument[self]." + c.getMaDRepresentation() and
|
||||
output = "ReturnValue." + c.getMaDRepresentation() and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
input = "Argument[self]" and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
@@ -4708,10 +4741,12 @@ module StdlibPrivate {
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4790,9 +4825,11 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
input = "Argument[self]" and
|
||||
output = "ReturnValue" and
|
||||
@@ -4839,9 +4876,11 @@ module StdlibPrivate {
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
input = "Argument[self].AnyDictionaryElement" and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
// TODO: Add the keys to output list
|
||||
input = "Argument[self]" and
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
consistencyOverview
|
||||
| deadEnd | 1 |
|
||||
deadEnd
|
||||
| without_loop.py:7:5:7:9 | Break |
|
||||
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Phase -1 of the dataflow CFG migration: verifies that every variable
|
||||
* binding visible to the AST (`Name.defines(v)`) corresponds to a CFG node
|
||||
* in the new CFG (`semmle.python.controlflow.internal.AstNodeImpl`).
|
||||
*
|
||||
* The expected tag is `cfgdefines=<name>`. Each binding annotation in the
|
||||
* test sources looks like `# $ cfgdefines=x` for a binding currently
|
||||
* covered by the new CFG, or `# $ MISSING: cfgdefines=x` for a binding
|
||||
* that is known to be uncovered (a "red" test case that should be
|
||||
* green-flipped once the corresponding `cfg-ext-*` extension lands).
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module CfgBindingsTest implements TestSig {
|
||||
string getARelevantTag() { result = "cfgdefines" }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Name n, Variable v, CfgImpl::ControlFlowNode cfg |
|
||||
n.defines(v) and
|
||||
cfg.getAstNode().asExpr() = n and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
tag = "cfgdefines" and
|
||||
value = v.getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<CfgBindingsTest>
|
||||
@@ -0,0 +1,13 @@
|
||||
# Annotated assignment (PEP 526). Both with and without an initializer.
|
||||
|
||||
a: int = 1 # $ cfgdefines=a
|
||||
b: str = "hi" # $ cfgdefines=b
|
||||
|
||||
# Annotation without value: the AST records `c` as defined,
|
||||
# and the new CFG now visits it via the AnnAssignStmt wrapper.
|
||||
c: int # $ cfgdefines=c
|
||||
|
||||
class K: # $ cfgdefines=K
|
||||
field: int = 0 # $ cfgdefines=field
|
||||
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# Compound (tuple/list) assignment targets — actually wired in the new CFG.
|
||||
|
||||
a, b = (1, 2) # $ cfgdefines=a cfgdefines=b
|
||||
[c, d] = [3, 4] # $ cfgdefines=c cfgdefines=d
|
||||
|
||||
# Nested unpacking.
|
||||
(e, (f, g)) = (1, (2, 3)) # $ cfgdefines=e cfgdefines=f cfgdefines=g
|
||||
|
||||
# Star unpacking.
|
||||
h, *i = [1, 2, 3] # $ cfgdefines=h cfgdefines=i
|
||||
|
||||
# Chained assignment with compound target.
|
||||
j = k, l = (5, 6) # $ cfgdefines=j cfgdefines=k cfgdefines=l
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
# Comprehension and `for` loop targets — wired in the new CFG.
|
||||
# Comprehensions are nested function scopes with a synthetic `.0` parameter
|
||||
# bound to the iterable.
|
||||
|
||||
# Bare-name `for` target.
|
||||
for i in range(3): # $ cfgdefines=i
|
||||
pass
|
||||
|
||||
# Compound `for` target.
|
||||
for k, v in [(1, 2)]: # $ cfgdefines=k cfgdefines=v
|
||||
pass
|
||||
|
||||
# Comprehension targets.
|
||||
_ = [x for x in range(3)] # $ cfgdefines=_ cfgdefines=x cfgdefines=.0
|
||||
_ = {y: z for y, z in []} # $ cfgdefines=_ cfgdefines=y cfgdefines=z cfgdefines=.0
|
||||
_ = (a for a in []) # $ cfgdefines=_ cfgdefines=a cfgdefines=.0
|
||||
|
||||
# Nested comprehensions.
|
||||
_ = [b for c in [] for b in c] # $ cfgdefines=_ cfgdefines=c cfgdefines=b cfgdefines=.0
|
||||
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
# Reachability of code following a try whose body always returns.
|
||||
#
|
||||
# The new CFG models exception edges for raise-prone expressions when
|
||||
# they appear inside a `try` (or `with`) statement, mirroring Java's
|
||||
# `mayThrow`. This means the body of a `try` has both a normal
|
||||
# completion edge and an exception edge to its handlers, so code
|
||||
# following the try-statement is reachable via the except-handler path
|
||||
# even when the try-body would otherwise always return.
|
||||
#
|
||||
# Code that is not reachable under either normal or exception flow
|
||||
# (for example, the `else` clause of a try whose body unconditionally
|
||||
# raises) remains correctly classified as dead.
|
||||
|
||||
|
||||
def f(obj): # $ cfgdefines=f cfgdefines=obj
|
||||
try:
|
||||
return len(obj)
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
# The try-body always returns, but `len(obj)` can raise (it is
|
||||
# inside the try, so we model its exception edge). The
|
||||
# `except TypeError: pass` handler falls through to here, making
|
||||
# the code below reachable.
|
||||
try:
|
||||
hint = type(obj).__length_hint__ # $ cfgdefines=hint
|
||||
except AttributeError:
|
||||
return None
|
||||
return hint
|
||||
|
||||
|
||||
def g(): # $ cfgdefines=g
|
||||
try:
|
||||
raise Exception("inner")
|
||||
except:
|
||||
raise Exception("outer")
|
||||
else:
|
||||
# Unreachable: the inner try body always raises (via an explicit
|
||||
# `raise`, which is modelled unconditionally), so the `else:`
|
||||
# clause never runs.
|
||||
hit_inner_else = True
|
||||
|
||||
|
||||
def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key
|
||||
try:
|
||||
return cache[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Same pattern as `f`: reachable via the except-handler fall-through.
|
||||
value = compute(key) # $ cfgdefines=value
|
||||
cache[key] = value
|
||||
return value
|
||||
@@ -0,0 +1,30 @@
|
||||
# Decorated `def`/`class` — wired in the new CFG.
|
||||
|
||||
|
||||
def deco(f): # $ cfgdefines=deco cfgdefines=f
|
||||
return f
|
||||
|
||||
|
||||
@deco
|
||||
def decorated_func(): # $ cfgdefines=decorated_func
|
||||
pass
|
||||
|
||||
|
||||
@deco
|
||||
class DecoratedClass: # $ cfgdefines=DecoratedClass
|
||||
pass
|
||||
|
||||
|
||||
# Stacked decorators.
|
||||
@deco
|
||||
@deco
|
||||
def doubly(): # $ cfgdefines=doubly
|
||||
pass
|
||||
|
||||
|
||||
# Inside a class body.
|
||||
class Outer: # $ cfgdefines=Outer
|
||||
@staticmethod
|
||||
def inner(): # $ cfgdefines=inner
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
# Exception-handler name bindings. These are already wired in the new
|
||||
# CFG provided the try body can raise; `raise` statements are reliably
|
||||
# treated as exception sources.
|
||||
|
||||
try:
|
||||
raise ValueError("oops")
|
||||
except ValueError as e: # $ cfgdefines=e
|
||||
pass
|
||||
|
||||
try:
|
||||
raise TypeError("oops")
|
||||
except (TypeError, KeyError) as err: # $ cfgdefines=err
|
||||
pass
|
||||
|
||||
# Exception groups (Python 3.11+).
|
||||
try:
|
||||
raise ValueError("oops")
|
||||
except* ValueError as eg: # $ cfgdefines=eg
|
||||
pass
|
||||
14
python/ql/test/library-tests/ControlFlow/bindings/imports.py
Normal file
14
python/ql/test/library-tests/ControlFlow/bindings/imports.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# Import aliases — all bound names below are now reachable via the new
|
||||
# CFG's `ImportStmt` wrapper.
|
||||
|
||||
import os # $ cfgdefines=os
|
||||
import os.path # $ cfgdefines=os
|
||||
import os as o # $ cfgdefines=o
|
||||
from os import path # $ cfgdefines=path
|
||||
from os import path as p # $ cfgdefines=p
|
||||
from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep
|
||||
from os import (
|
||||
getcwd, # $ cfgdefines=getcwd
|
||||
getcwdb, # $ cfgdefines=getcwdb
|
||||
)
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
# Match-statement pattern bindings — wired in the new CFG.
|
||||
|
||||
def f(subject): # $ cfgdefines=f cfgdefines=subject
|
||||
match subject:
|
||||
case x: # $ cfgdefines=x
|
||||
pass
|
||||
case [a, b]: # $ cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
case {"k": v}: # $ cfgdefines=v
|
||||
pass
|
||||
case Point(p, q): # $ cfgdefines=p cfgdefines=q
|
||||
pass
|
||||
case [_, *rest]: # $ cfgdefines=rest
|
||||
pass
|
||||
case (1 | 2) as n: # $ cfgdefines=n
|
||||
pass
|
||||
|
||||
|
||||
class Point: # $ cfgdefines=Point
|
||||
__match_args__ = ("x", "y") # $ cfgdefines=__match_args__
|
||||
x: int # $ cfgdefines=x
|
||||
y: int # $ cfgdefines=y
|
||||
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# Function parameters.
|
||||
|
||||
def positional(a, b): # $ cfgdefines=positional cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
|
||||
|
||||
def with_default(x=1, y=2): # $ cfgdefines=with_default cfgdefines=x cfgdefines=y
|
||||
pass
|
||||
|
||||
|
||||
def with_vararg(*args): # $ cfgdefines=with_vararg cfgdefines=args
|
||||
pass
|
||||
|
||||
|
||||
def with_kwarg(**kwargs): # $ cfgdefines=with_kwarg cfgdefines=kwargs
|
||||
pass
|
||||
|
||||
|
||||
def with_kwonly(*, k1, k2=5): # $ cfgdefines=with_kwonly cfgdefines=k1 cfgdefines=k2
|
||||
pass
|
||||
|
||||
|
||||
def kitchen_sink(a, b=2, *args, k1, k2=5, **kw): # $ cfgdefines=kitchen_sink cfgdefines=a cfgdefines=b cfgdefines=args cfgdefines=k1 cfgdefines=k2 cfgdefines=kw
|
||||
pass
|
||||
|
||||
|
||||
# Methods get `self` / `cls`.
|
||||
class C: # $ cfgdefines=C
|
||||
def method(self, x): # $ cfgdefines=method cfgdefines=self cfgdefines=x
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def cmethod(cls, x): # $ cfgdefines=cmethod cfgdefines=cls cfgdefines=x
|
||||
pass
|
||||
|
||||
|
||||
# Lambda parameter.
|
||||
_ = lambda p: p + 1 # $ cfgdefines=_ cfgdefines=p
|
||||
|
||||
# PEP 570 positional-only.
|
||||
def pos_only(a, b, /, c): # $ cfgdefines=pos_only cfgdefines=a cfgdefines=b cfgdefines=c
|
||||
pass
|
||||
14
python/ql/test/library-tests/ControlFlow/bindings/simple.py
Normal file
14
python/ql/test/library-tests/ControlFlow/bindings/simple.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# Simple bindings that should already work in the new CFG.
|
||||
# No MISSING annotations expected.
|
||||
|
||||
x = 1 # $ cfgdefines=x
|
||||
y = x + 1 # $ cfgdefines=y
|
||||
|
||||
def f(): # $ cfgdefines=f
|
||||
pass
|
||||
|
||||
class C: # $ cfgdefines=C
|
||||
pass
|
||||
|
||||
# Re-assignment.
|
||||
x = 2 # $ cfgdefines=x
|
||||
@@ -0,0 +1,21 @@
|
||||
# PEP 695 type parameters (Python 3.12+).
|
||||
|
||||
# PEP 695 type-param names on `def`/`class` bind in an annotation scope
|
||||
# that nests the function/class body — they have no CFG node in the
|
||||
# enclosing scope (matching the legacy CFG).
|
||||
def func[T](x: T) -> T: # $ cfgdefines=func cfgdefines=x
|
||||
return x
|
||||
|
||||
|
||||
class Box[T]: # $ cfgdefines=Box
|
||||
item: T # $ cfgdefines=item
|
||||
|
||||
|
||||
# Multi-parameter, with bound and variadics.
|
||||
def multi[T: int, *Ts, **P](x: T, *args: *Ts, **kwargs: P.kwargs) -> T: # $ cfgdefines=multi cfgdefines=x cfgdefines=args cfgdefines=kwargs
|
||||
return x
|
||||
|
||||
|
||||
# `type` statement (PEP 695).
|
||||
type Alias[T] = list[T] # $ cfgdefines=Alias cfgdefines=T
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
# Walrus and starred-target edge cases — wired in the new CFG.
|
||||
|
||||
# Walrus in expression context.
|
||||
if (y := 5) > 0: # $ cfgdefines=y
|
||||
pass
|
||||
|
||||
# Walrus in a comprehension. The comprehension introduces a synthetic
|
||||
# `.0` parameter bound to the iterable.
|
||||
_ = [w for _ in range(3) if (w := 1)] # $ cfgdefines=_ cfgdefines=w cfgdefines=.0
|
||||
|
||||
# Starred target in a Tuple LHS.
|
||||
*head, tail = [1, 2, 3] # $ cfgdefines=head cfgdefines=tail
|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
# `with cm() as x:` bindings — wired in the new CFG.
|
||||
|
||||
class CM: # $ cfgdefines=CM
|
||||
def __enter__(self): return self # $ cfgdefines=__enter__ cfgdefines=self
|
||||
def __exit__(self, *a): pass # $ cfgdefines=__exit__ cfgdefines=self cfgdefines=a
|
||||
|
||||
with CM() as x: # $ cfgdefines=x
|
||||
pass
|
||||
|
||||
# Multiple items.
|
||||
with CM() as a, CM() as b: # $ cfgdefines=a cfgdefines=b
|
||||
pass
|
||||
|
||||
# Parenthesised form (Python 3.10+).
|
||||
with (CM() as p, CM() as q): # $ cfgdefines=p cfgdefines=q
|
||||
pass
|
||||
|
||||
# Compound target in `with`.
|
||||
with CM() as (m, n): # $ cfgdefines=m cfgdefines=n
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
/** New-CFG version of AllLiveReachable. */
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TestFunction f
|
||||
where allLiveReachable(a, f)
|
||||
select a, "Unreachable live annotation; entry of $@ does not reach this node", f, f.getName()
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* New-CFG version of AnnotationHasCfgNode.
|
||||
*
|
||||
* Checks that every timer annotation has a corresponding CFG node.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann
|
||||
where annotationWithoutCfgNode(ann)
|
||||
select ann, "Annotation in $@ has no CFG node", ann.getTestFunction(),
|
||||
ann.getTestFunction().getName()
|
||||
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* New-CFG version of BasicBlockAnnotationGap.
|
||||
*
|
||||
* Original:
|
||||
* Checks that within a basic block, if a node is annotated then its
|
||||
* successor is also annotated (or excluded). A gap in annotations
|
||||
* within a basic block indicates a missing annotation, since there
|
||||
* are no branches to justify the gap.
|
||||
*
|
||||
* Nodes with exceptional successors are excluded, as the exception
|
||||
* edge leaves the basic block and the normal successor may be dead.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, CfgNode succ
|
||||
where basicBlockAnnotationGap(a, succ)
|
||||
select a, "Annotated node followed by unannotated $@ in the same basic block", succ,
|
||||
succ.getNode().toString()
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* New-CFG version of BasicBlockOrdering.
|
||||
*
|
||||
* Original:
|
||||
* Checks that within a single basic block, annotations appear in
|
||||
* increasing minimum-timestamp order.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int minA, int minB
|
||||
where basicBlockOrdering(a, b, minA, minB)
|
||||
select a, "Basic block ordering: $@ appears before $@", a.getTimestampExpr(minA),
|
||||
"timestamp " + minA, b.getTimestampExpr(minB), "timestamp " + minB
|
||||
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
* New-CFG version of BranchTimestamps.
|
||||
*
|
||||
* Checks that when a node has both a true and false successor, the
|
||||
* live timestamps on one branch are marked as dead on the other.
|
||||
* This ensures that boolean branches are fully annotated with dead()
|
||||
* markers for the paths not taken.
|
||||
*
|
||||
* Limitation: the `@ t[ts, ...]` / `dead(ts)` annotation scheme can only
|
||||
* model branch-dead-ness for plain boolean control flow that reconverges
|
||||
* linearly after the split — i.e. `if`-with-else and `if`-expression.
|
||||
* It cannot model:
|
||||
*
|
||||
* * loops (`while` / `for`): body timestamps repeat across iterations,
|
||||
* so the loop-exit annotation can't list them as dead;
|
||||
* * `match` statements: each `case` body is a syntactically distinct
|
||||
* sub-tree, and the branches don't reconverge through a common
|
||||
* annotation point in the timeline;
|
||||
* * `try` / `with` and `raise` / `assert`: exception edges are modelled
|
||||
* as true/false but flow to syntactically distinct handlers, with no
|
||||
* reconvergence in the linear annotation order;
|
||||
* * short-circuit `and` / `or` (`BoolExpr`): the branches reconverge at
|
||||
* the BoolExpr's after-node, so timestamps on one branch are live
|
||||
* downstream of the other rather than dead;
|
||||
* * `if` without an `else` clause, and `if`/`elif` chains: the false
|
||||
* branch reconverges with the true branch at the post-if statement
|
||||
* (no-else) or fans out across multiple elif-test annotations,
|
||||
* neither of which fit the binary annotation scheme.
|
||||
*
|
||||
* Branch nodes inside those constructs are therefore whitelisted out
|
||||
* below. The check still fires (and is useful) for plain `if`/`else`
|
||||
* and conditional-expression branching.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
/**
|
||||
* Holds if `f` contains a construct whose branches the linear-timestamp
|
||||
* annotation scheme cannot describe (see file-level comment).
|
||||
*/
|
||||
private predicate hasUnmodellableBranching(Function f) {
|
||||
exists(AstNode bad |
|
||||
bad.getScope() = f and
|
||||
(
|
||||
bad instanceof While
|
||||
or
|
||||
bad instanceof For
|
||||
or
|
||||
bad instanceof MatchStmt
|
||||
or
|
||||
bad instanceof Try
|
||||
or
|
||||
bad instanceof With
|
||||
or
|
||||
bad instanceof Raise
|
||||
or
|
||||
bad instanceof Assert
|
||||
or
|
||||
bad instanceof BoolExpr
|
||||
or
|
||||
bad instanceof If and
|
||||
(not exists(bad.(If).getAnOrelse()) or bad.(If).isElif())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
from TimerCfgNode node, int ts, string branch
|
||||
where
|
||||
missingBranchTimestamp(node, ts, branch) and
|
||||
not hasUnmodellableBranching(node.getTestFunction())
|
||||
select node,
|
||||
"Timestamp " + ts + " on true/false branch is missing a dead() annotation on the " + branch +
|
||||
" successor in $@", node.getTestFunction(), node.getTestFunction().getName()
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* New-CFG version of ConsecutivePredecessorTimestamps.
|
||||
*
|
||||
* Checks that each annotated node (except the minimum timestamp) has
|
||||
* a predecessor annotation with timestamp `a - 1`. This is the reverse
|
||||
* of ConsecutiveTimestamps: it catches nodes that are reachable but
|
||||
* arrived at from the wrong place (skipping an intermediate node).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann, int a
|
||||
where consecutivePredecessorTimestamps(ann, a)
|
||||
select ann, "$@ in $@ has no consecutive predecessor (expected " + (a - 1) + ")",
|
||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
||||
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* New-CFG version of ConsecutiveTimestamps.
|
||||
*
|
||||
* Original:
|
||||
* Checks that consecutive annotated nodes have consecutive timestamps:
|
||||
* for each annotation with timestamp `a`, some CFG node for that annotation
|
||||
* must have a next annotation containing `a + 1`.
|
||||
*
|
||||
* Handles CFG splitting (e.g., finally blocks duplicated for normal/exceptional
|
||||
* flow) by checking that at least one split has the required successor.
|
||||
*
|
||||
* Only applies to functions where all annotations are in the function's
|
||||
* own scope (excludes tests with generators, async, comprehensions, or
|
||||
* lambdas that have annotations in nested scopes).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann, int a
|
||||
where consecutiveTimestamps(ann, a)
|
||||
select ann, "$@ in $@ has no consecutive successor (expected " + (a + 1) + ")",
|
||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
||||
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Implementation of the evaluation-order CFG signature using the new
|
||||
* shared control flow graph from AstNodeImpl.
|
||||
*/
|
||||
|
||||
private import python as Py
|
||||
import TimerUtils
|
||||
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
private import codeql.controlflow.SuccessorType
|
||||
|
||||
private class NewControlFlowNode = CfgImpl::ControlFlowNode;
|
||||
|
||||
private class NewBasicBlock = CfgImpl::BasicBlock;
|
||||
|
||||
/** New (shared) CFG implementation of the evaluation-order signature. */
|
||||
module NewCfg implements EvalOrderCfgSig {
|
||||
class CfgNode instanceof NewControlFlowNode {
|
||||
// We must pick a *unique* representative CFG node for each AST node. The
|
||||
// shared CFG has several nodes per AST node (before / in-post-order / after
|
||||
// / after-value splits), but the timer test framework keys annotations on
|
||||
// `getNode()` and assumes one CFG node per annotated AST node. Without a
|
||||
// filter, an annotated `f()` would map to both `f()` and `After f()`, which
|
||||
// breaks two framework invariants: (1) the "no shared reachable" check
|
||||
// requires that two distinct nodes sharing a timestamp be mutually
|
||||
// unreachable (true/false branches of a condition), but `Before f()`,
|
||||
// `f()` and `After f()` share the annotation's timestamp *and* lie on one
|
||||
// linear path; and (2) the annotation walk (`nextTimerAnnotation`) halts at
|
||||
// the first reachable representative, so a second node for the same AST
|
||||
// node would stall the walk on the same timestamp instead of advancing to
|
||||
// the next evaluation event.
|
||||
//
|
||||
// We use the "after" node (`isAfter`) rather than the canonical `injects`
|
||||
// node, because `injects` represents short-circuit / conditional
|
||||
// expressions (`and`/`or`/`not`/ternary) by their *before* node, placing
|
||||
// them ahead of their operands — wrong for evaluation order. `isAfter`
|
||||
// instead picks the post-evaluation node: the merged before/after node for
|
||||
// simple leaves, the `TAfterNode` for post-order expressions, and the
|
||||
// `AfterValueNode`(s) for pre-order conditionals, all positioned after the
|
||||
// operands. The two value-split nodes of a conditional are genuinely
|
||||
// distinct evaluation outcomes (handled by `getATrueSuccessor` /
|
||||
// `getAFalseSuccessor`), so they do not violate the uniqueness assumption.
|
||||
CfgNode() { NewControlFlowNode.super.isAfter(_) }
|
||||
|
||||
string toString() { result = NewControlFlowNode.super.toString() }
|
||||
|
||||
Py::Location getLocation() { result = NewControlFlowNode.super.getLocation() }
|
||||
|
||||
Py::AstNode getNode() {
|
||||
result = CfgImpl::astNodeToPyNode(NewControlFlowNode.super.getAstNode())
|
||||
}
|
||||
|
||||
CfgNode getASuccessor() { nextCfgNode(this, result) }
|
||||
|
||||
CfgNode getATrueSuccessor() {
|
||||
NewControlFlowNode.super.isAfterTrue(_) and
|
||||
// Only where there's also a false branch (true boolean split)
|
||||
exists(NewControlFlowNode other | other.isAfterFalse(NewControlFlowNode.super.getAstNode())) and
|
||||
nextCfgNodeFrom(this, result)
|
||||
}
|
||||
|
||||
CfgNode getAFalseSuccessor() {
|
||||
NewControlFlowNode.super.isAfterFalse(_) and
|
||||
// Only where there's also a true branch (true boolean split)
|
||||
exists(NewControlFlowNode other | other.isAfterTrue(NewControlFlowNode.super.getAstNode())) and
|
||||
nextCfgNodeFrom(this, result)
|
||||
}
|
||||
|
||||
CfgNode getAnExceptionalSuccessor() {
|
||||
exists(NewControlFlowNode mid |
|
||||
mid = NewControlFlowNode.super.getAnExceptionSuccessor() and
|
||||
nextCfgNodeFrom(mid, result)
|
||||
)
|
||||
}
|
||||
|
||||
Py::Scope getScope() { result = NewControlFlowNode.super.getEnclosingCallable().asScope() }
|
||||
|
||||
BasicBlock getBasicBlock() {
|
||||
exists(NewBasicBlock bb, int i | bb.getNode(i) = this and result = bb)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `next` is the nearest CfgNode reachable from `n` via
|
||||
* one or more raw CFG successor edges, skipping non-CfgNode intermediaries.
|
||||
*/
|
||||
private predicate nextCfgNodeFrom(NewControlFlowNode n, CfgNode next) {
|
||||
next = n.getASuccessor()
|
||||
or
|
||||
exists(NewControlFlowNode mid |
|
||||
mid = n.getASuccessor() and
|
||||
not mid instanceof CfgNode and
|
||||
nextCfgNodeFrom(mid, next)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `next` is the nearest CfgNode successor of `n`,
|
||||
* skipping synthetic intermediate nodes.
|
||||
*/
|
||||
private predicate nextCfgNode(CfgNode n, CfgNode next) { nextCfgNodeFrom(n, next) }
|
||||
|
||||
class BasicBlock instanceof NewBasicBlock {
|
||||
string toString() { result = NewBasicBlock.super.toString() }
|
||||
|
||||
CfgNode getNode(int n) { result = NewBasicBlock.super.getNode(n) }
|
||||
|
||||
predicate reaches(BasicBlock bb) { this = bb or this.strictlyReaches(bb) }
|
||||
|
||||
predicate strictlyReaches(BasicBlock bb) { NewBasicBlock.super.getASuccessor+() = bb }
|
||||
|
||||
predicate strictlyDominates(BasicBlock bb) { NewBasicBlock.super.strictlyDominates(bb) }
|
||||
}
|
||||
|
||||
CfgNode scopeGetEntryNode(Py::Scope s) {
|
||||
exists(CfgImpl::ControlFlow::EntryNode entry |
|
||||
entry.getEnclosingCallable().asScope() = s and
|
||||
nextCfgNodeFrom(entry, result)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* New-CFG version of NeverReachable.
|
||||
*
|
||||
* Original:
|
||||
* Checks that expressions annotated with `t.never` either have no CFG
|
||||
* node, or if they do, that the node is not reachable from its scope's
|
||||
* entry (including within the same basic block).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerAnnotation ann
|
||||
where neverReachable(ann)
|
||||
select ann, "Node annotated with t.never is reachable in $@", ann.getTestFunction(),
|
||||
ann.getTestFunction().getName()
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* New-CFG version of NoBackwardFlow.
|
||||
*
|
||||
* Original:
|
||||
* Checks that time never flows backward between consecutive timer annotations
|
||||
* in the CFG. For each pair of consecutive annotated nodes (A -> B), there must
|
||||
* exist timestamps a in A and b in B with a < b.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int minA, int maxB
|
||||
where noBackwardFlow(a, b, minA, maxB)
|
||||
select a, "Backward flow: $@ flows to $@ (max timestamp $@)", a.getTimestampExpr(minA),
|
||||
minA.toString(), b, b.getNode().toString(), b.getTimestampExpr(maxB), maxB.toString()
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* New-CFG version of NoBasicBlock.
|
||||
*
|
||||
* Checks that every annotated CFG node belongs to a basic block.
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from CfgNode n, TestFunction f
|
||||
where noBasicBlock(n, f)
|
||||
select n, "CFG node in $@ does not belong to any basic block", f, f.getName()
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* New-CFG version of NoSharedReachable.
|
||||
*
|
||||
* Original:
|
||||
* Checks that two annotations sharing a timestamp value are on
|
||||
* mutually exclusive CFG paths (neither can reach the other).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int ts
|
||||
where noSharedReachable(a, b, ts)
|
||||
select a, "Shared timestamp $@ but this node reaches $@", a.getTimestampExpr(ts), ts.toString(), b,
|
||||
b.getNode().toString()
|
||||
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* New-CFG version of StrictForward.
|
||||
*
|
||||
* Original:
|
||||
* Stronger version of NoBackwardFlow: for consecutive annotated nodes
|
||||
* A -> B that both have a single timestamp (non-loop code) and B does
|
||||
* NOT dominate A (forward edge), requires max(A) < min(B).
|
||||
*/
|
||||
|
||||
import python
|
||||
import TimerUtils
|
||||
import NewCfgImpl
|
||||
|
||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
||||
|
||||
private import Utils
|
||||
private import Utils::CfgTests
|
||||
|
||||
from TimerCfgNode a, TimerCfgNode b, int maxA, int minB
|
||||
where strictForward(a, b, maxA, minB)
|
||||
select a, "Strict forward violation: $@ flows to $@", a.getTimestampExpr(maxA), "timestamp " + maxA,
|
||||
b.getTimestampExpr(minB), "timestamp " + minB
|
||||
@@ -3,14 +3,14 @@
|
||||
* Python control flow graph.
|
||||
*/
|
||||
|
||||
private import python as PY
|
||||
private import python as Py
|
||||
import TimerUtils
|
||||
|
||||
/** Existing Python CFG implementation of the evaluation-order signature. */
|
||||
module OldCfg implements EvalOrderCfgSig {
|
||||
class CfgNode = PY::ControlFlowNode;
|
||||
class CfgNode = Py::ControlFlowNode;
|
||||
|
||||
class BasicBlock = PY::BasicBlock;
|
||||
class BasicBlock = Py::BasicBlock;
|
||||
|
||||
CfgNode scopeGetEntryNode(PY::Scope s) { result = s.getEntryNode() }
|
||||
CfgNode scopeGetEntryNode(Py::Scope s) { result = s.getEntryNode() }
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ def test_nested_if_else(t):
|
||||
else:
|
||||
z = 2 @ t[dead(4)]
|
||||
else:
|
||||
z = 3 @ t[dead(4)]
|
||||
z = 3 @ t[dead(3), dead(4)]
|
||||
w = 0 @ t[5]
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Inline-expectations test for the store/load/delete/parameter
|
||||
* classification predicates on the new-CFG facade.
|
||||
*
|
||||
* Each tag fires when the corresponding predicate (`isLoad`,
|
||||
* `isStore`, `isDelete`, `isParameter`, `isAugLoad`, `isAugStore`)
|
||||
* holds on the canonical CFG node wrapping a `Py::Name` with the
|
||||
* given identifier. Subscript and attribute stores are not covered
|
||||
* by these tags — only the `Name`-typed targets/loads they involve.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module StoreLoadTest implements TestSig {
|
||||
string getARelevantTag() { result = ["load", "store", "delete", "param", "augload", "augstore"] }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Cfg::NameNode n |
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId() and
|
||||
(
|
||||
n.isLoad() and not n.isAugLoad() and tag = "load"
|
||||
or
|
||||
n.isStore() and not n.isAugStore() and tag = "store"
|
||||
or
|
||||
n.isDelete() and tag = "delete"
|
||||
or
|
||||
n.isParameter() and tag = "param"
|
||||
or
|
||||
n.isAugLoad() and tag = "augload"
|
||||
or
|
||||
n.isAugStore() and tag = "augstore"
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<StoreLoadTest>
|
||||
56
python/ql/test/library-tests/ControlFlow/store-load/test.py
Normal file
56
python/ql/test/library-tests/ControlFlow/store-load/test.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# Store/load/delete/parameter classification on the new-CFG facade.
|
||||
#
|
||||
# Each annotated location carries the (sorted, deduplicated) set of
|
||||
# kinds the CFG facade reports there. Comparing against the legacy
|
||||
# 'semmle.python.Flow' classification is done by the comparison query
|
||||
# 'StoreLoadParity.ql' — annotations here are only the positive
|
||||
# assertions for the new facade.
|
||||
#
|
||||
# Tags:
|
||||
# load=<id> -- isLoad() fires on the Name
|
||||
# store=<id> -- isStore() fires
|
||||
# delete=<id> -- isDelete() fires
|
||||
# param=<id> -- isParameter() fires
|
||||
# augload=<id> -- isAugLoad() fires (the LHS of x += ... when read)
|
||||
# augstore=<id> -- isAugStore() fires (the LHS of x += ... when written)
|
||||
|
||||
|
||||
# --- plain load / store / delete ---
|
||||
|
||||
x = 1 # $ store=x
|
||||
y = x + 1 # $ store=y load=x
|
||||
print(y) # $ load=print load=y
|
||||
del x # $ delete=x
|
||||
|
||||
|
||||
# --- function definitions (parameters) ---
|
||||
|
||||
def f(a, b=2, *args, c, **kwargs): # $ store=f param=a param=b param=args param=c param=kwargs
|
||||
return a + b + c # $ load=a load=b load=c
|
||||
|
||||
|
||||
# --- augmented assignment splits one Name into load + store halves ---
|
||||
|
||||
def aug(): # $ store=aug
|
||||
n = 0 # $ store=n
|
||||
n += 1 # $ augload=n augstore=n
|
||||
return n # $ load=n
|
||||
|
||||
|
||||
# --- subscript / attribute stores ---
|
||||
|
||||
class C: # $ store=C
|
||||
pass
|
||||
|
||||
|
||||
def stores(obj, container, idx): # $ store=stores param=obj param=container param=idx
|
||||
obj.attr = 1 # $ load=obj
|
||||
container[idx] = 2 # $ load=container load=idx
|
||||
return obj # $ load=obj
|
||||
|
||||
|
||||
# --- tuple unpacking ---
|
||||
|
||||
def unpack(pair): # $ store=unpack param=pair
|
||||
a, b = pair # $ store=a store=b load=pair
|
||||
return a + b # $ load=a load=b
|
||||
@@ -589,11 +589,11 @@ def test_zip_tuple():
|
||||
|
||||
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
|
||||
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
|
||||
SINK_F(z[0][2])
|
||||
SINK_F(z[0][3])
|
||||
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
|
||||
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
||||
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
|
||||
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
|
||||
SINK_F(z[1][3])
|
||||
|
||||
@expects(4)
|
||||
|
||||
@@ -157,7 +157,7 @@ class MyClass2(object):
|
||||
print(self.foo) # $ tracked MISSING: tracked=foo
|
||||
|
||||
instance = MyClass2()
|
||||
print(instance.foo) # $ MISSING: tracked=foo tracked
|
||||
print(instance.foo) # $ tracked MISSING: tracked=foo
|
||||
instance.print_foo() # $ MISSING: tracked=foo
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ class Sub1(Base1):
|
||||
|
||||
sub1 = Sub1()
|
||||
sub1.read_foo()
|
||||
print(sub1.foo) # $ MISSING: tracked=foo tracked
|
||||
print(sub1.foo) # $ tracked MISSING: tracked=foo
|
||||
|
||||
|
||||
# attribute written in a subclass method, read in an inherited base class method
|
||||
@@ -210,7 +210,7 @@ class Sub2(Base2):
|
||||
|
||||
sub2 = Sub2()
|
||||
sub2.read_bar()
|
||||
print(sub2.bar) # $ MISSING: tracked=bar tracked
|
||||
print(sub2.bar) # $ tracked MISSING: tracked=bar
|
||||
|
||||
|
||||
# attribute written in a base class method, read on an instance of the subclass
|
||||
@@ -223,4 +223,4 @@ class Sub3(Base3):
|
||||
pass
|
||||
|
||||
sub3 = Sub3()
|
||||
print(sub3.baz) # $ MISSING: tracked=baz tracked
|
||||
print(sub3.baz) # $ tracked MISSING: tracked=baz
|
||||
|
||||
@@ -362,7 +362,7 @@ def test_load_in_bulk():
|
||||
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
|
||||
d = TestLoad.objects.in_bulk([1])
|
||||
for val in d.values():
|
||||
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
|
||||
SINK(val.text) # $ MISSING: flow
|
||||
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#select
|
||||
| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value |
|
||||
| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value |
|
||||
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value |
|
||||
@@ -24,6 +25,8 @@ edges
|
||||
| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | |
|
||||
| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | |
|
||||
| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | |
|
||||
@@ -51,6 +54,9 @@ nodes
|
||||
| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||
| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||
|
||||
@@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source
|
||||
cursor.close()
|
||||
|
||||
@app.get("/unsafe3/")
|
||||
async def unsafe3(name: str): # $ MISSING: Source
|
||||
async def unsafe3(name: str): # $ Source
|
||||
query = "select * from users where name=" + name
|
||||
cursor = hdb_con3.cursor()
|
||||
cursor.execute(query) # $ MISSING: Alert
|
||||
cursor.execute(query) # $ Alert
|
||||
cursor.close()
|
||||
|
||||
@app.get("/unsafe4/")
|
||||
|
||||
@@ -28,8 +28,6 @@ nodes
|
||||
| string_flow.rb:227:10:227:10 | a | semmle.label | a |
|
||||
subpaths
|
||||
testFailures
|
||||
| string_flow.rb:85:10:85:10 | a | Unexpected result: hasValueFlow=a |
|
||||
| string_flow.rb:227:10:227:10 | a | Unexpected result: hasValueFlow=a |
|
||||
#select
|
||||
| string_flow.rb:3:10:3:22 | call to new | string_flow.rb:2:9:2:18 | call to source | string_flow.rb:3:10:3:22 | call to new | $@ | string_flow.rb:2:9:2:18 | call to source | call to source |
|
||||
| string_flow.rb:85:10:85:10 | a | string_flow.rb:83:9:83:18 | call to source | string_flow.rb:85:10:85:10 | a | $@ | string_flow.rb:83:9:83:18 | call to source | call to source |
|
||||
|
||||
@@ -82,7 +82,7 @@ end
|
||||
def m_clear
|
||||
a = source "a"
|
||||
a.clear
|
||||
sink a
|
||||
sink a # $ SPURIOUS: hasValueFlow=a
|
||||
end
|
||||
|
||||
# concat and prepend omitted because they clash with the summaries for
|
||||
@@ -224,7 +224,7 @@ def m_replace
|
||||
b = source "b"
|
||||
sink a.replace(b) # $ hasTaintFlow=b
|
||||
# TODO: currently we get value flow for a, because we don't clear content
|
||||
sink a # $ hasTaintFlow=b
|
||||
sink a # $ hasTaintFlow=b SPURIOUS: hasValueFlow=a
|
||||
end
|
||||
|
||||
def m_reverse
|
||||
@@ -316,4 +316,4 @@ def m_upto(i)
|
||||
a.upto("b", true) { |x| sink x } # $ hasTaintFlow=a
|
||||
"b".upto(a) { |x| sink x } # $ hasTaintFlow=a
|
||||
"b".upto(a, true) { |x| sink x }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -9,7 +9,7 @@ end
|
||||
class OneController < ActionController::Base
|
||||
before_action :a
|
||||
after_action :c
|
||||
|
||||
|
||||
def a
|
||||
@foo = params[:foo]
|
||||
end
|
||||
@@ -18,14 +18,14 @@ class OneController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo
|
||||
sink @foo # $ hasTaintFlow
|
||||
end
|
||||
end
|
||||
|
||||
class TwoController < ActionController::Base
|
||||
before_action :a
|
||||
after_action :c
|
||||
|
||||
|
||||
def a
|
||||
@foo = params[:foo]
|
||||
end
|
||||
@@ -35,14 +35,14 @@ class TwoController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo
|
||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
||||
end
|
||||
end
|
||||
|
||||
class ThreeController < ActionController::Base
|
||||
before_action :a
|
||||
after_action :c
|
||||
|
||||
|
||||
def a
|
||||
@foo = params[:foo]
|
||||
@foo = "safe"
|
||||
@@ -52,14 +52,14 @@ class ThreeController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo
|
||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
||||
end
|
||||
end
|
||||
|
||||
class FourController < ActionController::Base
|
||||
before_action :a
|
||||
after_action :c
|
||||
|
||||
|
||||
def a
|
||||
@foo.bar = params[:foo]
|
||||
end
|
||||
@@ -68,14 +68,14 @@ class FourController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink(@foo.bar)
|
||||
sink(@foo.bar) # $ hasTaintFlow
|
||||
end
|
||||
end
|
||||
|
||||
class FiveController < ActionController::Base
|
||||
before_action :a
|
||||
after_action :c
|
||||
|
||||
|
||||
def a
|
||||
self.taint_foo
|
||||
end
|
||||
@@ -84,10 +84,10 @@ class FiveController < ActionController::Base
|
||||
end
|
||||
|
||||
def c
|
||||
sink @foo
|
||||
sink @foo # $ hasTaintFlow
|
||||
end
|
||||
|
||||
|
||||
def taint_foo
|
||||
@foo = params[:foo]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -270,11 +270,6 @@ nodes
|
||||
| params_flow.rb:205:10:205:10 | a | semmle.label | a |
|
||||
subpaths
|
||||
testFailures
|
||||
| filter_flow.rb:21:10:21:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:38:10:38:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:55:10:55:13 | @foo | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:71:10:71:17 | call to bar | Unexpected result: hasTaintFlow |
|
||||
| filter_flow.rb:87:11:87:14 | @foo | Unexpected result: hasTaintFlow |
|
||||
#select
|
||||
| filter_flow.rb:21:10:21:13 | @foo | filter_flow.rb:14:12:14:17 | call to params | filter_flow.rb:21:10:21:13 | @foo | $@ | filter_flow.rb:14:12:14:17 | call to params | call to params |
|
||||
| filter_flow.rb:38:10:38:13 | @foo | filter_flow.rb:30:12:30:17 | call to params | filter_flow.rb:38:10:38:13 | @foo | $@ | filter_flow.rb:30:12:30:17 | call to params | call to params |
|
||||
|
||||
@@ -121,37 +121,3 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||
///
|
||||
/// Use `manual_rule!` when the transform needs control over capture
|
||||
/// translation timing — for example, when an outer rule needs to set
|
||||
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||
/// translation reaches inner rules that read that state.
|
||||
///
|
||||
/// ```text
|
||||
/// manual_rule!(
|
||||
/// (query_pattern field: (_) @name)
|
||||
/// {
|
||||
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||
/// let translated = ctx.translate(name)?;
|
||||
/// Ok(translated)
|
||||
/// }
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// Differences from [`rule!`]:
|
||||
/// - Captures are **not** auto-translated before the body runs; they
|
||||
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||
/// tree template, no `Ok(...)` wrap.
|
||||
#[proc_macro]
|
||||
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||
let input2: TokenStream2 = input.into();
|
||||
match parse::parse_manual_rule_top(input2) {
|
||||
Ok(output) => output.into(),
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
|
||||
/// Parse a single query node (possibly with a trailing `@capture`).
|
||||
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
let base = parse_query_atom(tokens)?;
|
||||
// Check for trailing @capture
|
||||
// Check for trailing @capture or @@capture
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume @
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
let name_str = capture_name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
|
||||
} else {
|
||||
let child = if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
let name_str = capture_name.to_string();
|
||||
quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
struct CaptureInfo {
|
||||
name: String,
|
||||
multiplicity: CaptureMultiplicity,
|
||||
/// `true` for `@@name` captures: the auto-translate prefix skips them,
|
||||
/// so the bound `NodeRef` refers to the raw (input-schema) node.
|
||||
raw: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
@@ -708,6 +709,14 @@ fn extract_captures_inner(
|
||||
extract_captures_inner(&mut inner, captures, child_mult);
|
||||
}
|
||||
TokenTree::Punct(p) if p.as_char() == '@' => {
|
||||
// `@@name` marks the capture as raw (skip auto-translate).
|
||||
let raw = matches!(
|
||||
tokens.peek(),
|
||||
Some(TokenTree::Punct(p)) if p.as_char() == '@'
|
||||
);
|
||||
if raw {
|
||||
tokens.next(); // consume the second `@`
|
||||
}
|
||||
if let Some(TokenTree::Ident(name)) = tokens.next() {
|
||||
let mult = if parent_mult == CaptureMultiplicity::Repeated
|
||||
|| last_mult == CaptureMultiplicity::Repeated
|
||||
@@ -723,6 +732,7 @@ fn extract_captures_inner(
|
||||
captures.push(CaptureInfo {
|
||||
name: name.to_string(),
|
||||
multiplicity: mult,
|
||||
raw,
|
||||
});
|
||||
}
|
||||
last_mult = CaptureMultiplicity::Single;
|
||||
@@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
// Parse query
|
||||
let query_code = parse_query_top(query_stream.clone())?;
|
||||
|
||||
// Capture names marked `@@name` (raw) — passed to the auto-translate
|
||||
// prefix as a skip list so those captures keep their input-schema ids.
|
||||
let raw_capture_names: Vec<&str> = captures
|
||||
.iter()
|
||||
.filter(|c| c.raw)
|
||||
.map(|c| c.name.as_str())
|
||||
.collect();
|
||||
|
||||
// Generate capture bindings
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
@@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// Auto-translation prefix: recursively translate every
|
||||
// captured node before invoking the user's transform body.
|
||||
// captured node before invoking the user's transform body,
|
||||
// except for `@@name` captures listed in `__skip` which the
|
||||
// body consumes raw.
|
||||
// For OneShot rules this preserves the legacy behaviour
|
||||
// (input-schema captures translated to output-schema
|
||||
// nodes); for Repeating rules it is a no-op.
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||
let __skip: &[&str] = &[#(#raw_capture_names),*];
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
let __result: Vec<usize> = { #transform_body };
|
||||
@@ -905,106 +926,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse `manual_rule!( query { body } )`.
|
||||
///
|
||||
/// Like [`parse_rule_top`] but:
|
||||
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||
/// - Generates code that does NOT auto-translate captures before
|
||||
/// running the body. Capture variables refer to raw (input-schema)
|
||||
/// nodes; the body is responsible for explicit translation via
|
||||
/// `ctx.translate(...)`.
|
||||
/// - The body is included verbatim and must evaluate to
|
||||
/// `Result<Vec<usize>, String>`.
|
||||
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let mut tokens = input.into_iter().peekable();
|
||||
|
||||
// Collect query tokens up to the body block `{ ... }`.
|
||||
let mut query_tokens = Vec::new();
|
||||
loop {
|
||||
match tokens.peek() {
|
||||
None => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||
))
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||
_ => {
|
||||
query_tokens.push(tokens.next().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||
|
||||
// Extract captures from the query (same as in `rule!`).
|
||||
let captures = extract_captures(&query_stream);
|
||||
|
||||
// Parse the query into the QueryNode-building expression.
|
||||
let query_code = parse_query_top(query_stream)?;
|
||||
|
||||
// Generate capture bindings (same as in `rule!`).
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name = Ident::new(&cap.name, Span::call_site());
|
||||
let name_str = &cap.name;
|
||||
match cap.multiplicity {
|
||||
CaptureMultiplicity::Repeated => quote! {
|
||||
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||
.into_iter()
|
||||
.map(yeast::NodeRef)
|
||||
.collect();
|
||||
},
|
||||
CaptureMultiplicity::Optional => quote! {
|
||||
let #name: Option<yeast::NodeRef> =
|
||||
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||
},
|
||||
CaptureMultiplicity::Single => quote! {
|
||||
let #name: yeast::NodeRef =
|
||||
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Consume the body block.
|
||||
let body_group = match tokens.next() {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||
other => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
format!(
|
||||
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||
),
|
||||
))
|
||||
}
|
||||
};
|
||||
let body_stream = body_group.stream();
|
||||
|
||||
// No tokens should follow the body.
|
||||
if let Some(tok) = tokens.next() {
|
||||
return Err(syn::Error::new_spanned(
|
||||
tok,
|
||||
"unexpected token after manual_rule! body",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// No auto-translate prefix for manual rules — the body
|
||||
// is responsible for translating captures explicitly.
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
#body_stream
|
||||
}))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Token utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -1013,6 +934,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
|
||||
}
|
||||
|
||||
/// Consume an `@` or `@@` capture marker and the following name ident.
|
||||
/// Caller has already verified `peek_is_at(tokens)`.
|
||||
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
|
||||
tokens.next(); // consume the first `@`
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume the second `@` of `@@`
|
||||
}
|
||||
expect_ident(tokens, "expected capture name after `@` or `@@`")
|
||||
}
|
||||
|
||||
fn peek_is_literal(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
|
||||
}
|
||||
@@ -1113,8 +1044,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
|
||||
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume @
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
let name_str = name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -1141,13 +1071,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
|
||||
}
|
||||
}
|
||||
|
||||
/// If `@name` follows a Repeated list element, wrap each child SingleNode
|
||||
/// inside the repetition with a Capture. This matches tree-sitter semantics
|
||||
/// where `(_)* @name` captures each matched node.
|
||||
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
|
||||
/// child SingleNode inside the repetition with a Capture. This matches
|
||||
/// tree-sitter semantics where `(_)* @name` captures each matched node.
|
||||
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
let name_str = name.to_string();
|
||||
// Re-parse the element isn't practical, so we generate a wrapper
|
||||
// that creates a new Repeated with each child wrapped in a capture.
|
||||
|
||||
@@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
|
||||
single capture (`Id`) and `{..name}` splices a repeated capture
|
||||
(`Vec<Id>`).
|
||||
|
||||
### Raw captures (`@@name`)
|
||||
|
||||
The default `@name` capture marker is *auto-translated*: in OneShot
|
||||
phases the macro recursively translates the captured node before
|
||||
binding it, so `{name}` in the output template splices a node that
|
||||
already conforms to the output schema.
|
||||
|
||||
For rules that need the raw (input-schema) capture — typically to read
|
||||
its source text or to translate it explicitly with mutable context
|
||||
state between calls — use `@@name` instead. The body sees the original
|
||||
input-schema `NodeRef`:
|
||||
|
||||
```rust
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
// raw_lhs is untranslated: read its original source text.
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
// rhs is already translated by the auto-translate prefix.
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
|
||||
markers are equivalent (auto-translation is a no-op for repeating
|
||||
rules).
|
||||
|
||||
## Complete example: for-loop desugaring
|
||||
|
||||
This rule rewrites Ruby's `for pat in val do body end` into
|
||||
|
||||
@@ -80,6 +80,28 @@ impl Captures {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like [`try_map_all_captures`] but leaves captures whose name appears
|
||||
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
|
||||
/// (raw) captures alongside the default auto-translated `@name`
|
||||
/// captures.
|
||||
pub fn try_map_captures_except<E>(
|
||||
&mut self,
|
||||
skip: &[&str],
|
||||
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
|
||||
) -> Result<(), E> {
|
||||
for (name, ids) in self.captures.iter_mut() {
|
||||
if skip.contains(name) {
|
||||
continue;
|
||||
}
|
||||
let mut new_ids = Vec::with_capacity(ids.len());
|
||||
for &id in ids.iter() {
|
||||
new_ids.extend(f(id)?);
|
||||
}
|
||||
*ids = new_ids;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
|
||||
if let Some(from_ids) = self.captures.get(from) {
|
||||
let new_values = from_ids.iter().copied().map(f).collect();
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod schema;
|
||||
pub mod tree_builder;
|
||||
mod visitor;
|
||||
|
||||
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||
pub use yeast_macros::{query, rule, tree, trees};
|
||||
|
||||
use captures::Captures;
|
||||
pub use cursor::Cursor;
|
||||
@@ -48,6 +48,12 @@ impl From<NodeRef> for Id {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Id> for NodeRef {
|
||||
fn from(value: Id) -> Self {
|
||||
NodeRef(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`std::fmt::Display`], but the formatting routine is given access to
|
||||
/// the [`Ast`] so that node references can resolve to their source text.
|
||||
///
|
||||
@@ -757,13 +763,14 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
}
|
||||
|
||||
/// Translate every captured node in `captures` in place (OneShot phase
|
||||
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures.
|
||||
/// only), except for captures whose name appears in `skip` — those are
|
||||
/// left as raw (input-schema) ids for the rule body to consume
|
||||
/// directly. In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures regardless of `skip`.
|
||||
///
|
||||
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||
/// pre-existing "auto-translate captures before running the transform
|
||||
/// body" behavior. Manually-written transforms typically translate
|
||||
/// captures selectively via [`translate`] instead.
|
||||
/// Used by the `rule!` macro's generated prefix. `skip` is populated
|
||||
/// from the macro's `@@name` capture markers; for plain `@name`
|
||||
/// captures (and rules with no `@@` markers) it is empty.
|
||||
///
|
||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||
@@ -772,11 +779,12 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
captures: &mut Captures,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
skip: &[&str],
|
||||
) -> Result<(), String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||
let root = *matched_root;
|
||||
captures.try_map_all_captures(|cid| {
|
||||
captures.try_map_captures_except(skip, |cid| {
|
||||
if cid == root {
|
||||
Ok(vec![cid])
|
||||
} else {
|
||||
|
||||
@@ -1058,6 +1058,111 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that `@@name` capture markers skip the auto-translate prefix:
|
||||
/// the body sees the *raw* (input-schema) NodeRef and can read its
|
||||
/// source text or call `ctx.translate(...)` explicitly. Compare with
|
||||
/// the bare `@name` form, where the auto-translate prefix runs the
|
||||
/// same translation up front and the body sees the post-translate id.
|
||||
#[test]
|
||||
fn test_raw_capture_marker() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
// `@@raw_lhs` is untranslated: the body reads its source text
|
||||
// ("x") and embeds it directly as the identifier content. `@rhs`
|
||||
// is auto-translated (rhs already points to (integer "INT")).
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
// `method:` uses the raw source text ("x"); if `@@` were broken and
|
||||
// auto-translation ran on `raw_lhs`, it would still produce the
|
||||
// string "x" (source_text inherits the input range), so the dump
|
||||
// wouldn't change here. The companion test
|
||||
// `test_raw_capture_marker_explicit_translate` exercises the
|
||||
// stronger property that `ctx.translate(raw_lhs)?` succeeds and
|
||||
// produces the translated `(identifier "ID")`.
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "x"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
/// Companion to `test_raw_capture_marker`: confirms that calling
|
||||
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
|
||||
/// produces the correctly-translated output-schema node. With `@`, the
|
||||
/// translation has already happened, so `ctx.translate(...)` inside the
|
||||
/// body would attempt to re-translate an output node (which has no
|
||||
/// matching rule and would error).
|
||||
#[test]
|
||||
fn test_raw_capture_marker_explicit_translate() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let translated_lhs = ctx.translate(raw_lhs)?;
|
||||
tree!((call
|
||||
method: {..translated_lhs}
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "ID"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Cursor tests ----
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use codeql_extractor::extractor::simple;
|
||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree};
|
||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, rule, tree};
|
||||
|
||||
/// User context propagated from outer rules down to the inner rules that
|
||||
/// emit the corresponding output declarations, so that each emitted node
|
||||
@@ -15,26 +15,26 @@ struct SwiftContext {
|
||||
/// (`computed_getter`/`computed_setter`/`computed_modify`/
|
||||
/// `willset_clause`/`didset_clause`/`getter_specifier`/
|
||||
/// `setter_specifier`).
|
||||
property_name: Option<yeast::Id>,
|
||||
property_name: Option<yeast::NodeRef>,
|
||||
/// Translated type node for the property type. Set by the outer
|
||||
/// `property_binding` rule (computed accessors variant) and
|
||||
/// `protocol_property_declaration` when present; read by the
|
||||
/// accessor inner rules.
|
||||
property_type: Option<yeast::Id>,
|
||||
property_type: Option<yeast::NodeRef>,
|
||||
/// Default-value expression for the next translated `parameter`. Set
|
||||
/// by the outer `function_parameter` rule; read by the `parameter`
|
||||
/// rules.
|
||||
default_value: Option<yeast::Id>,
|
||||
default_value: Option<yeast::NodeRef>,
|
||||
/// Translated outer modifiers (e.g. visibility, attributes) to
|
||||
/// attach to each child of a flattening outer rule. Set by
|
||||
/// `property_declaration`, `enum_entry`, and
|
||||
/// `protocol_property_declaration`.
|
||||
outer_modifiers: Vec<yeast::Id>,
|
||||
outer_modifiers: Vec<yeast::NodeRef>,
|
||||
/// The `let`/`var` binding modifier for a `property_declaration`.
|
||||
/// Set by `property_declaration`; read by the inner declaration
|
||||
/// rules (`property_binding` variants, accessor rules) so they
|
||||
/// emit it as part of the output node's `modifier:` field.
|
||||
binding_modifier: Option<yeast::Id>,
|
||||
binding_modifier: Option<yeast::NodeRef>,
|
||||
/// True when the current child of a flattening outer rule is not
|
||||
/// the first one — its inner rule should emit a
|
||||
/// `chained_declaration` modifier so the original grouping can be
|
||||
@@ -45,10 +45,10 @@ struct SwiftContext {
|
||||
/// Build a freshly-created `chained_declaration` modifier node if
|
||||
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
|
||||
/// emit the chained tag for non-first children of a flattening outer
|
||||
/// rule. Returns `Option<Id>` so it splices via `{..…}` to 0 or 1 ids.
|
||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::Id> {
|
||||
/// rule. Returns `Option<NodeRef>` so it splices via `{..…}` to 0 or 1 ids.
|
||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::NodeRef> {
|
||||
if ctx.is_chained {
|
||||
Some(ctx.literal("modifier", "chained_declaration"))
|
||||
Some(ctx.literal("modifier", "chained_declaration").into())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -192,21 +192,15 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// this whole property_binding is itself a non-first declarator
|
||||
// of a containing property_declaration); subsequent accessors
|
||||
// always emit `chained_declaration`.
|
||||
manual_rule!(
|
||||
rule!(
|
||||
(property_binding
|
||||
name: @pattern
|
||||
type: _? @ty
|
||||
computed_value: (computed_property accessor: _+ @accessors))
|
||||
{
|
||||
// Translate `ty` first so the context holds an
|
||||
// output-schema node id.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
// Build the property-name identifier from the
|
||||
// (untranslated) pattern leaf.
|
||||
let name_id = tree!((identifier #{pattern}));
|
||||
|
||||
ctx.property_name = Some(name_id);
|
||||
ctx.property_type = translated_ty;
|
||||
computed_value: (computed_property accessor: _+ @@accessors))
|
||||
=>
|
||||
{..{
|
||||
ctx.property_name = Some(tree!((identifier #{pattern})).into());
|
||||
ctx.property_type = ty;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
@@ -215,8 +209,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
}
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
result
|
||||
}}
|
||||
),
|
||||
// Computed property: shorthand getter (no explicit get/set, just
|
||||
// statements) → a single accessor_declaration with kind "get".
|
||||
@@ -248,30 +242,26 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// The `variable_declaration` itself inherits the outer rule's
|
||||
// chained state; observers always get `chained_declaration`
|
||||
// because they're subsequent outputs of this flattening rule.
|
||||
manual_rule!(
|
||||
rule!(
|
||||
(property_binding
|
||||
name: (pattern bound_identifier: @name)
|
||||
type: _? @ty
|
||||
value: _? @val
|
||||
observers: (willset_didset_block willset: _? @ws didset: _? @ds))
|
||||
{
|
||||
// Translate ty and val so the variable_declaration
|
||||
// below contains output-schema nodes.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
let translated_val = ctx.translate_opt(val)?;
|
||||
|
||||
observers: (willset_didset_block willset: _? @@ws didset: _? @@ds))
|
||||
=>
|
||||
{..{
|
||||
let var_decl = tree!(
|
||||
(variable_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
type: {..translated_ty}
|
||||
value: {..translated_val})
|
||||
type: {..ty}
|
||||
value: {..val})
|
||||
);
|
||||
|
||||
// Publish the property name for the observer rules.
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
||||
// Observers are subsequent outputs of this flattening
|
||||
// rule, so they always get `chained_declaration`.
|
||||
ctx.is_chained = true;
|
||||
@@ -280,8 +270,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
for obs in ws.into_iter().chain(ds) {
|
||||
result.extend(ctx.translate(obs)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
result
|
||||
}}
|
||||
),
|
||||
// property_binding with any pattern name (identifier or
|
||||
// destructuring). Reads outer modifiers / chained tag from `ctx`.
|
||||
@@ -309,27 +299,24 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// inner declaration rules (`property_binding` variants,
|
||||
// accessor inner rules) read these fields and emit complete
|
||||
// `modifier:` lists from the start.
|
||||
manual_rule!(
|
||||
rule!(
|
||||
(property_declaration
|
||||
binding: (value_binding_pattern mutability: @binding_kind)
|
||||
declarator: _* @decls
|
||||
binding: (value_binding_pattern mutability: @@binding_kind)
|
||||
declarator: _* @@decls
|
||||
(modifiers)* @mods)
|
||||
{
|
||||
let binding_text = ctx.ast.source_text(binding_kind.0);
|
||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text));
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
=>
|
||||
{..{
|
||||
let binding_text = ctx.ast.source_text(binding_kind.into());
|
||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text).into());
|
||||
ctx.outer_modifiers = mods;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, decl) in decls.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(decl)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
result
|
||||
}}
|
||||
),
|
||||
// ---- Enums ----
|
||||
// enum_type_parameter → parameter (with optional name as pattern).
|
||||
@@ -386,22 +373,19 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// into `ctx` and translate each case with `ctx.is_chained`
|
||||
// toggled per iteration so the inner `enum_case_entry` rules
|
||||
// emit complete `modifier:` lists from the start.
|
||||
manual_rule!(
|
||||
(enum_entry case: _+ @cases (modifiers)* @mods)
|
||||
{
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
rule!(
|
||||
(enum_entry case: _+ @@cases (modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
ctx.outer_modifiers = mods;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, case) in cases.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(case)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
result
|
||||
}}
|
||||
),
|
||||
// Plain assignment: `x = expr`
|
||||
rule!(
|
||||
@@ -476,12 +460,13 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// optional default values. Publishes the default value into `ctx`
|
||||
// before translating the inner `parameter` so the `parameter`
|
||||
// rules can include it as a `default:` field directly.
|
||||
manual_rule!(
|
||||
(function_parameter parameter: @p default_value: _? @def)
|
||||
{
|
||||
ctx.default_value = ctx.translate_opt(def)?;
|
||||
ctx.translate(p)
|
||||
}
|
||||
rule!(
|
||||
(function_parameter parameter: @@p default_value: _? @def)
|
||||
=>
|
||||
{..{
|
||||
ctx.default_value = def;
|
||||
ctx.translate(p)?
|
||||
}}
|
||||
),
|
||||
// Parameter with external name and type
|
||||
rule!(
|
||||
@@ -1026,28 +1011,25 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
// inner `getter_specifier`/`setter_specifier` rules emit
|
||||
// complete nodes from the start (including the
|
||||
// `chained_declaration` tag for non-first accessors).
|
||||
manual_rule!(
|
||||
rule!(
|
||||
(protocol_property_declaration
|
||||
name: (pattern bound_identifier: @name)
|
||||
requirements: (protocol_property_requirements accessor: _+ @accessors)
|
||||
requirements: (protocol_property_requirements accessor: _+ @@accessors)
|
||||
type: _? @ty
|
||||
(modifiers)* @mods)
|
||||
{
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
ctx.property_type = ctx.translate_opt(ty)?;
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
=>
|
||||
{..{
|
||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
||||
ctx.property_type = ty;
|
||||
ctx.outer_modifiers = mods;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
result
|
||||
}}
|
||||
),
|
||||
// getter_specifier / setter_specifier → bodyless accessor_declaration
|
||||
// getter_specifier / setter_specifier → bodyless
|
||||
|
||||
Reference in New Issue
Block a user