mirror of
https://github.com/github/codeql.git
synced 2026-07-03 10:35:29 +02:00
Compare commits
3 Commits
yoff/pytho
...
python-two
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdd557f877 | ||
|
|
2bf6031c0f | ||
|
|
a5444b573a |
@@ -1,2 +0,0 @@
|
|||||||
import semmle.python.controlflow.internal.AstNodeImpl
|
|
||||||
import ControlFlow::Consistency
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
---
|
|
||||||
category: minorAnalysis
|
|
||||||
---
|
|
||||||
* A new Python control flow graph implementation has been added under `semmle.python.controlflow.internal.Cfg` (backed by `AstNodeImpl.qll`), built on the shared `codeql.controlflow.ControlFlowGraph` library. It is not yet used by the dataflow library or any production query; the legacy CFG in `semmle/python/Flow.qll` remains the default. The new library is exposed for tests and for upcoming migrations.
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
---
|
|
||||||
category: minorAnalysis
|
|
||||||
---
|
|
||||||
* The new (shared-CFG-based) Python control flow graph now visits parameter and return type annotations as CFG nodes for function definitions, matching the legacy CFG. This restores annotation-based type tracking through framework models such as FastAPI's `Depends()`, Pydantic request models, Starlette `WebSocket` handlers, and any other models that flow a class reference through `Parameter.getAnnotation()` to identify instances of the annotated class.
|
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
category: minorAnalysis
|
||||||
|
---
|
||||||
|
|
||||||
|
- Temporarily disabled the `instanceFieldStep` disjunct of the internal `TypeTrackingInput::levelStepCall` predicate, which was introduced in 7.2.0 and caused catastrophic query slowdowns on some OOP-heavy Python codebases (e.g. `mypy` and `dask`).
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
/**
|
|
||||||
* @name Print CFG
|
|
||||||
* @description Produces a representation of a file's Control Flow Graph.
|
|
||||||
* This query is used by the VS Code extension.
|
|
||||||
* @id py/print-cfg
|
|
||||||
* @kind graph
|
|
||||||
* @tags ide-contextual-queries/print-cfg
|
|
||||||
*/
|
|
||||||
|
|
||||||
import semmle.python.Files as Files
|
|
||||||
// import semmle.python.Scope
|
|
||||||
import semmle.python.controlflow.internal.AstNodeImpl
|
|
||||||
|
|
||||||
external string selectedSourceFile();
|
|
||||||
|
|
||||||
private predicate selectedSourceFileAlias = selectedSourceFile/0;
|
|
||||||
|
|
||||||
external int selectedSourceLine();
|
|
||||||
|
|
||||||
private predicate selectedSourceLineAlias = selectedSourceLine/0;
|
|
||||||
|
|
||||||
external int selectedSourceColumn();
|
|
||||||
|
|
||||||
private predicate selectedSourceColumnAlias = selectedSourceColumn/0;
|
|
||||||
|
|
||||||
module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig<Files::File> {
|
|
||||||
predicate selectedSourceFile = selectedSourceFileAlias/0;
|
|
||||||
|
|
||||||
predicate selectedSourceLine = selectedSourceLineAlias/0;
|
|
||||||
|
|
||||||
predicate selectedSourceColumn = selectedSourceColumnAlias/0;
|
|
||||||
|
|
||||||
predicate cfgScopeSpan(
|
|
||||||
Ast::Callable scope, Files::File file, int startLine, int startColumn, int endLine,
|
|
||||||
int endColumn
|
|
||||||
) {
|
|
||||||
file = scope.getLocation().getFile() and
|
|
||||||
scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
import ControlFlow::ViewCfgQuery<Files::File, ViewCfgQueryInput>
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1138,7 +1138,9 @@ predicate clearsContent(Node n, ContentSet cs) {
|
|||||||
* Holds if the value that is being tracked is expected to be stored inside content `c`
|
* Holds if the value that is being tracked is expected to be stored inside content `c`
|
||||||
* at node `n`.
|
* at node `n`.
|
||||||
*/
|
*/
|
||||||
predicate expectsContent(Node n, ContentSet c) { none() }
|
predicate expectsContent(Node n, ContentSet c) {
|
||||||
|
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if values stored inside attribute `c` are cleared at node `n`.
|
* Holds if values stored inside attribute `c` are cleared at node `n`.
|
||||||
|
|||||||
@@ -91,6 +91,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
|
|||||||
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
|
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
|
||||||
|
|
||||||
bindingset[token]
|
bindingset[token]
|
||||||
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
|
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
|
||||||
// needed to support `Argument[x..y]` ranges
|
// needed to support `Argument[x..y]` ranges
|
||||||
|
|||||||
@@ -170,7 +170,13 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
|
|||||||
|
|
||||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
||||||
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) {
|
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) {
|
||||||
instanceFieldStep(nodeFrom, nodeTo)
|
// HOTFIX: `instanceFieldStep` is temporarily disabled (via `and none()`).
|
||||||
|
// It uses `classInstanceTracker(cls)` -- itself a type-tracker run --
|
||||||
|
// from inside `levelStepCall`, creating a structural mutual recursion
|
||||||
|
// that causes catastrophic query slowdowns on some OOP-heavy Python
|
||||||
|
// codebases (e.g. mypy and dask). The `and none()` should be removed
|
||||||
|
// once that recursion is redesigned.
|
||||||
|
instanceFieldStep(nodeFrom, nodeTo) and none()
|
||||||
or
|
or
|
||||||
inheritedFieldStep(nodeFrom, nodeTo)
|
inheritedFieldStep(nodeFrom, nodeTo)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4199,11 +4199,9 @@ module StdlibPrivate {
|
|||||||
// The positional argument contains a mapping.
|
// The positional argument contains a mapping.
|
||||||
// TODO: these values can be overwritten by keyword arguments
|
// TODO: these values can be overwritten by keyword arguments
|
||||||
// - dict mapping
|
// - dict mapping
|
||||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
input = "Argument[0].WithAnyDictionaryElement" and
|
||||||
input = "Argument[0].DictionaryElement[" + key + "]" and
|
output = "ReturnValue" and
|
||||||
output = "ReturnValue.DictionaryElement[" + key + "]" and
|
preservesValue = true
|
||||||
preservesValue = true
|
|
||||||
)
|
|
||||||
or
|
or
|
||||||
// - list-of-pairs mapping
|
// - list-of-pairs mapping
|
||||||
input = "Argument[0].ListElement.TupleElement[1]" and
|
input = "Argument[0].ListElement.TupleElement[1]" and
|
||||||
@@ -4240,9 +4238,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
// Element content is mutated into list element content
|
// Element content is mutated into list element content
|
||||||
@@ -4266,11 +4262,9 @@ module StdlibPrivate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].WithAnyTupleElement" and
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]" and
|
output = "ReturnValue" and
|
||||||
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
|
preservesValue = true
|
||||||
preservesValue = true
|
|
||||||
)
|
|
||||||
or
|
or
|
||||||
input = "Argument[0].ListElement" and
|
input = "Argument[0].ListElement" and
|
||||||
output = "ReturnValue" and
|
output = "ReturnValue" and
|
||||||
@@ -4294,9 +4288,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue.SetElement" and
|
output = "ReturnValue.SetElement" and
|
||||||
@@ -4342,9 +4334,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue.ListElement" and
|
output = "ReturnValue.ListElement" and
|
||||||
@@ -4372,9 +4362,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
content = "SetElement"
|
content = "SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
content = "AnyTupleElement"
|
||||||
content = "TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
|
|
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
input = "Argument[0]." + content and
|
input = "Argument[0]." + content and
|
||||||
@@ -4404,9 +4392,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue.ListElement" and
|
output = "ReturnValue.ListElement" and
|
||||||
@@ -4434,9 +4420,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue" and
|
output = "ReturnValue" and
|
||||||
@@ -4468,9 +4452,7 @@ module StdlibPrivate {
|
|||||||
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
|
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
|
||||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||||
i = 0 and
|
i = 0 and
|
||||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
input = "Argument[1].AnyTupleElement"
|
||||||
input = "Argument[1].TupleElement[" + j.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "Argument[0].Parameter[" + i.toString() + "]" and
|
output = "Argument[0].Parameter[" + i.toString() + "]" and
|
||||||
@@ -4499,9 +4481,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[1].SetElement"
|
input = "Argument[1].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[1].AnyTupleElement"
|
||||||
input = "Argument[1].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
|
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
|
||||||
@@ -4525,9 +4505,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[0].SetElement"
|
input = "Argument[0].SetElement"
|
||||||
or
|
or
|
||||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
input = "Argument[0].AnyTupleElement"
|
||||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||||
@@ -4552,12 +4530,7 @@ module StdlibPrivate {
|
|||||||
or
|
or
|
||||||
input = "Argument[" + i.toString() + "].SetElement"
|
input = "Argument[" + i.toString() + "].SetElement"
|
||||||
or
|
or
|
||||||
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
|
input = "Argument[" + i.toString() + "].AnyTupleElement"
|
||||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
|
||||||
i in [0 .. 1] and
|
|
||||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
|
||||||
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
|
|
||||||
)
|
|
||||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||||
) and
|
) and
|
||||||
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
|
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
|
||||||
@@ -4580,12 +4553,6 @@ module StdlibPrivate {
|
|||||||
override DataFlow::ArgumentNode getACallback() { none() }
|
override DataFlow::ArgumentNode getACallback() { none() }
|
||||||
|
|
||||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||||
exists(DataFlow::Content c |
|
|
||||||
input = "Argument[self]." + c.getMaDRepresentation() and
|
|
||||||
output = "ReturnValue." + c.getMaDRepresentation() and
|
|
||||||
preservesValue = true
|
|
||||||
)
|
|
||||||
or
|
|
||||||
input = "Argument[self]" and
|
input = "Argument[self]" and
|
||||||
output = "ReturnValue" and
|
output = "ReturnValue" and
|
||||||
preservesValue = true
|
preservesValue = true
|
||||||
@@ -4741,12 +4708,10 @@ module StdlibPrivate {
|
|||||||
override DataFlow::ArgumentNode getACallback() { none() }
|
override DataFlow::ArgumentNode getACallback() { none() }
|
||||||
|
|
||||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
input = "Argument[self].AnyDictionaryElement" and
|
||||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
output = "ReturnValue.TupleElement[1]" and
|
||||||
output = "ReturnValue.TupleElement[1]" and
|
preservesValue = true
|
||||||
preservesValue = true
|
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
||||||
// TODO: put `key` into "ReturnValue.TupleElement[0]"
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4825,11 +4790,9 @@ module StdlibPrivate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
input = "Argument[self].AnyDictionaryElement" and
|
||||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
output = "ReturnValue.ListElement" and
|
||||||
output = "ReturnValue.ListElement" and
|
preservesValue = true
|
||||||
preservesValue = true
|
|
||||||
)
|
|
||||||
or
|
or
|
||||||
input = "Argument[self]" and
|
input = "Argument[self]" and
|
||||||
output = "ReturnValue" and
|
output = "ReturnValue" and
|
||||||
@@ -4876,11 +4839,9 @@ module StdlibPrivate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||||
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
|
input = "Argument[self].AnyDictionaryElement" and
|
||||||
input = "Argument[self].DictionaryElement[" + key + "]" and
|
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
preservesValue = true
|
||||||
preservesValue = true
|
|
||||||
)
|
|
||||||
or
|
or
|
||||||
// TODO: Add the keys to output list
|
// TODO: Add the keys to output list
|
||||||
input = "Argument[self]" and
|
input = "Argument[self]" and
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
consistencyOverview
|
|
||||||
| deadEnd | 1 |
|
|
||||||
deadEnd
|
|
||||||
| without_loop.py:7:5:7:9 | Break |
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
/**
|
|
||||||
* Phase -1 of the dataflow CFG migration: verifies that every variable
|
|
||||||
* binding visible to the AST (`Name.defines(v)`) corresponds to a CFG node
|
|
||||||
* in the new CFG (`semmle.python.controlflow.internal.AstNodeImpl`).
|
|
||||||
*
|
|
||||||
* The expected tag is `cfgdefines=<name>`. Each binding annotation in the
|
|
||||||
* test sources looks like `# $ cfgdefines=x` for a binding currently
|
|
||||||
* covered by the new CFG, or `# $ MISSING: cfgdefines=x` for a binding
|
|
||||||
* that is known to be uncovered (a "red" test case that should be
|
|
||||||
* green-flipped once the corresponding `cfg-ext-*` extension lands).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
|
||||||
import utils.test.InlineExpectationsTest
|
|
||||||
|
|
||||||
module CfgBindingsTest implements TestSig {
|
|
||||||
string getARelevantTag() { result = "cfgdefines" }
|
|
||||||
|
|
||||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
|
||||||
exists(Name n, Variable v, CfgImpl::ControlFlowNode cfg |
|
|
||||||
n.defines(v) and
|
|
||||||
cfg.getAstNode().asExpr() = n and
|
|
||||||
location = n.getLocation() and
|
|
||||||
element = n.toString() and
|
|
||||||
tag = "cfgdefines" and
|
|
||||||
value = v.getId()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
import MakeTest<CfgBindingsTest>
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# Annotated assignment (PEP 526). Both with and without an initializer.
|
|
||||||
|
|
||||||
a: int = 1 # $ cfgdefines=a
|
|
||||||
b: str = "hi" # $ cfgdefines=b
|
|
||||||
|
|
||||||
# Annotation without value: the AST records `c` as defined,
|
|
||||||
# and the new CFG now visits it via the AnnAssignStmt wrapper.
|
|
||||||
c: int # $ cfgdefines=c
|
|
||||||
|
|
||||||
class K: # $ cfgdefines=K
|
|
||||||
field: int = 0 # $ cfgdefines=field
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
# Compound (tuple/list) assignment targets — actually wired in the new CFG.
|
|
||||||
|
|
||||||
a, b = (1, 2) # $ cfgdefines=a cfgdefines=b
|
|
||||||
[c, d] = [3, 4] # $ cfgdefines=c cfgdefines=d
|
|
||||||
|
|
||||||
# Nested unpacking.
|
|
||||||
(e, (f, g)) = (1, (2, 3)) # $ cfgdefines=e cfgdefines=f cfgdefines=g
|
|
||||||
|
|
||||||
# Star unpacking.
|
|
||||||
h, *i = [1, 2, 3] # $ cfgdefines=h cfgdefines=i
|
|
||||||
|
|
||||||
# Chained assignment with compound target.
|
|
||||||
j = k, l = (5, 6) # $ cfgdefines=j cfgdefines=k cfgdefines=l
|
|
||||||
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# Comprehension and `for` loop targets — wired in the new CFG.
|
|
||||||
# Comprehensions are nested function scopes with a synthetic `.0` parameter
|
|
||||||
# bound to the iterable.
|
|
||||||
|
|
||||||
# Bare-name `for` target.
|
|
||||||
for i in range(3): # $ cfgdefines=i
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Compound `for` target.
|
|
||||||
for k, v in [(1, 2)]: # $ cfgdefines=k cfgdefines=v
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Comprehension targets.
|
|
||||||
_ = [x for x in range(3)] # $ cfgdefines=_ cfgdefines=x cfgdefines=.0
|
|
||||||
_ = {y: z for y, z in []} # $ cfgdefines=_ cfgdefines=y cfgdefines=z cfgdefines=.0
|
|
||||||
_ = (a for a in []) # $ cfgdefines=_ cfgdefines=a cfgdefines=.0
|
|
||||||
|
|
||||||
# Nested comprehensions.
|
|
||||||
_ = [b for c in [] for b in c] # $ cfgdefines=_ cfgdefines=c cfgdefines=b cfgdefines=.0
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
# Reachability of code following a try whose body always returns.
|
|
||||||
#
|
|
||||||
# The new CFG models exception edges for raise-prone expressions when
|
|
||||||
# they appear inside a `try` (or `with`) statement, mirroring Java's
|
|
||||||
# `mayThrow`. This means the body of a `try` has both a normal
|
|
||||||
# completion edge and an exception edge to its handlers, so code
|
|
||||||
# following the try-statement is reachable via the except-handler path
|
|
||||||
# even when the try-body would otherwise always return.
|
|
||||||
#
|
|
||||||
# Code that is not reachable under either normal or exception flow
|
|
||||||
# (for example, the `else` clause of a try whose body unconditionally
|
|
||||||
# raises) remains correctly classified as dead.
|
|
||||||
|
|
||||||
|
|
||||||
def f(obj): # $ cfgdefines=f cfgdefines=obj
|
|
||||||
try:
|
|
||||||
return len(obj)
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# The try-body always returns, but `len(obj)` can raise (it is
|
|
||||||
# inside the try, so we model its exception edge). The
|
|
||||||
# `except TypeError: pass` handler falls through to here, making
|
|
||||||
# the code below reachable.
|
|
||||||
try:
|
|
||||||
hint = type(obj).__length_hint__ # $ cfgdefines=hint
|
|
||||||
except AttributeError:
|
|
||||||
return None
|
|
||||||
return hint
|
|
||||||
|
|
||||||
|
|
||||||
def g(): # $ cfgdefines=g
|
|
||||||
try:
|
|
||||||
raise Exception("inner")
|
|
||||||
except:
|
|
||||||
raise Exception("outer")
|
|
||||||
else:
|
|
||||||
# Unreachable: the inner try body always raises (via an explicit
|
|
||||||
# `raise`, which is modelled unconditionally), so the `else:`
|
|
||||||
# clause never runs.
|
|
||||||
hit_inner_else = True
|
|
||||||
|
|
||||||
|
|
||||||
def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key
|
|
||||||
try:
|
|
||||||
return cache[key]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Same pattern as `f`: reachable via the except-handler fall-through.
|
|
||||||
value = compute(key) # $ cfgdefines=value
|
|
||||||
cache[key] = value
|
|
||||||
return value
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
# Decorated `def`/`class` — wired in the new CFG.
|
|
||||||
|
|
||||||
|
|
||||||
def deco(f): # $ cfgdefines=deco cfgdefines=f
|
|
||||||
return f
|
|
||||||
|
|
||||||
|
|
||||||
@deco
|
|
||||||
def decorated_func(): # $ cfgdefines=decorated_func
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@deco
|
|
||||||
class DecoratedClass: # $ cfgdefines=DecoratedClass
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Stacked decorators.
|
|
||||||
@deco
|
|
||||||
@deco
|
|
||||||
def doubly(): # $ cfgdefines=doubly
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Inside a class body.
|
|
||||||
class Outer: # $ cfgdefines=Outer
|
|
||||||
@staticmethod
|
|
||||||
def inner(): # $ cfgdefines=inner
|
|
||||||
pass
|
|
||||||
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
# Exception-handler name bindings. These are already wired in the new
|
|
||||||
# CFG provided the try body can raise; `raise` statements are reliably
|
|
||||||
# treated as exception sources.
|
|
||||||
|
|
||||||
try:
|
|
||||||
raise ValueError("oops")
|
|
||||||
except ValueError as e: # $ cfgdefines=e
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
raise TypeError("oops")
|
|
||||||
except (TypeError, KeyError) as err: # $ cfgdefines=err
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Exception groups (Python 3.11+).
|
|
||||||
try:
|
|
||||||
raise ValueError("oops")
|
|
||||||
except* ValueError as eg: # $ cfgdefines=eg
|
|
||||||
pass
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
# Import aliases — all bound names below are now reachable via the new
|
|
||||||
# CFG's `ImportStmt` wrapper.
|
|
||||||
|
|
||||||
import os # $ cfgdefines=os
|
|
||||||
import os.path # $ cfgdefines=os
|
|
||||||
import os as o # $ cfgdefines=o
|
|
||||||
from os import path # $ cfgdefines=path
|
|
||||||
from os import path as p # $ cfgdefines=p
|
|
||||||
from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep
|
|
||||||
from os import (
|
|
||||||
getcwd, # $ cfgdefines=getcwd
|
|
||||||
getcwdb, # $ cfgdefines=getcwdb
|
|
||||||
)
|
|
||||||
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
# Match-statement pattern bindings — wired in the new CFG.
|
|
||||||
|
|
||||||
def f(subject): # $ cfgdefines=f cfgdefines=subject
|
|
||||||
match subject:
|
|
||||||
case x: # $ cfgdefines=x
|
|
||||||
pass
|
|
||||||
case [a, b]: # $ cfgdefines=a cfgdefines=b
|
|
||||||
pass
|
|
||||||
case {"k": v}: # $ cfgdefines=v
|
|
||||||
pass
|
|
||||||
case Point(p, q): # $ cfgdefines=p cfgdefines=q
|
|
||||||
pass
|
|
||||||
case [_, *rest]: # $ cfgdefines=rest
|
|
||||||
pass
|
|
||||||
case (1 | 2) as n: # $ cfgdefines=n
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Point: # $ cfgdefines=Point
|
|
||||||
__match_args__ = ("x", "y") # $ cfgdefines=__match_args__
|
|
||||||
x: int # $ cfgdefines=x
|
|
||||||
y: int # $ cfgdefines=y
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
# Function parameters.
|
|
||||||
|
|
||||||
def positional(a, b): # $ cfgdefines=positional cfgdefines=a cfgdefines=b
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def with_default(x=1, y=2): # $ cfgdefines=with_default cfgdefines=x cfgdefines=y
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def with_vararg(*args): # $ cfgdefines=with_vararg cfgdefines=args
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def with_kwarg(**kwargs): # $ cfgdefines=with_kwarg cfgdefines=kwargs
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def with_kwonly(*, k1, k2=5): # $ cfgdefines=with_kwonly cfgdefines=k1 cfgdefines=k2
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def kitchen_sink(a, b=2, *args, k1, k2=5, **kw): # $ cfgdefines=kitchen_sink cfgdefines=a cfgdefines=b cfgdefines=args cfgdefines=k1 cfgdefines=k2 cfgdefines=kw
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Methods get `self` / `cls`.
|
|
||||||
class C: # $ cfgdefines=C
|
|
||||||
def method(self, x): # $ cfgdefines=method cfgdefines=self cfgdefines=x
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def cmethod(cls, x): # $ cfgdefines=cmethod cfgdefines=cls cfgdefines=x
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Lambda parameter.
|
|
||||||
_ = lambda p: p + 1 # $ cfgdefines=_ cfgdefines=p
|
|
||||||
|
|
||||||
# PEP 570 positional-only.
|
|
||||||
def pos_only(a, b, /, c): # $ cfgdefines=pos_only cfgdefines=a cfgdefines=b cfgdefines=c
|
|
||||||
pass
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
# Simple bindings that should already work in the new CFG.
|
|
||||||
# No MISSING annotations expected.
|
|
||||||
|
|
||||||
x = 1 # $ cfgdefines=x
|
|
||||||
y = x + 1 # $ cfgdefines=y
|
|
||||||
|
|
||||||
def f(): # $ cfgdefines=f
|
|
||||||
pass
|
|
||||||
|
|
||||||
class C: # $ cfgdefines=C
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Re-assignment.
|
|
||||||
x = 2 # $ cfgdefines=x
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# PEP 695 type parameters (Python 3.12+).
|
|
||||||
|
|
||||||
# PEP 695 type-param names on `def`/`class` bind in an annotation scope
|
|
||||||
# that nests the function/class body — they have no CFG node in the
|
|
||||||
# enclosing scope (matching the legacy CFG).
|
|
||||||
def func[T](x: T) -> T: # $ cfgdefines=func cfgdefines=x
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
class Box[T]: # $ cfgdefines=Box
|
|
||||||
item: T # $ cfgdefines=item
|
|
||||||
|
|
||||||
|
|
||||||
# Multi-parameter, with bound and variadics.
|
|
||||||
def multi[T: int, *Ts, **P](x: T, *args: *Ts, **kwargs: P.kwargs) -> T: # $ cfgdefines=multi cfgdefines=x cfgdefines=args cfgdefines=kwargs
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
# `type` statement (PEP 695).
|
|
||||||
type Alias[T] = list[T] # $ cfgdefines=Alias cfgdefines=T
|
|
||||||
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
# Walrus and starred-target edge cases — wired in the new CFG.
|
|
||||||
|
|
||||||
# Walrus in expression context.
|
|
||||||
if (y := 5) > 0: # $ cfgdefines=y
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Walrus in a comprehension. The comprehension introduces a synthetic
|
|
||||||
# `.0` parameter bound to the iterable.
|
|
||||||
_ = [w for _ in range(3) if (w := 1)] # $ cfgdefines=_ cfgdefines=w cfgdefines=.0
|
|
||||||
|
|
||||||
# Starred target in a Tuple LHS.
|
|
||||||
*head, tail = [1, 2, 3] # $ cfgdefines=head cfgdefines=tail
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# `with cm() as x:` bindings — wired in the new CFG.
|
|
||||||
|
|
||||||
class CM: # $ cfgdefines=CM
|
|
||||||
def __enter__(self): return self # $ cfgdefines=__enter__ cfgdefines=self
|
|
||||||
def __exit__(self, *a): pass # $ cfgdefines=__exit__ cfgdefines=self cfgdefines=a
|
|
||||||
|
|
||||||
with CM() as x: # $ cfgdefines=x
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Multiple items.
|
|
||||||
with CM() as a, CM() as b: # $ cfgdefines=a cfgdefines=b
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Parenthesised form (Python 3.10+).
|
|
||||||
with (CM() as p, CM() as q): # $ cfgdefines=p cfgdefines=q
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Compound target in `with`.
|
|
||||||
with CM() as (m, n): # $ cfgdefines=m cfgdefines=n
|
|
||||||
pass
|
|
||||||
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
/** New-CFG version of AllLiveReachable. */
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, TestFunction f
|
|
||||||
where allLiveReachable(a, f)
|
|
||||||
select a, "Unreachable live annotation; entry of $@ does not reach this node", f, f.getName()
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of AnnotationHasCfgNode.
|
|
||||||
*
|
|
||||||
* Checks that every timer annotation has a corresponding CFG node.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerAnnotation ann
|
|
||||||
where annotationWithoutCfgNode(ann)
|
|
||||||
select ann, "Annotation in $@ has no CFG node", ann.getTestFunction(),
|
|
||||||
ann.getTestFunction().getName()
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of BasicBlockAnnotationGap.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that within a basic block, if a node is annotated then its
|
|
||||||
* successor is also annotated (or excluded). A gap in annotations
|
|
||||||
* within a basic block indicates a missing annotation, since there
|
|
||||||
* are no branches to justify the gap.
|
|
||||||
*
|
|
||||||
* Nodes with exceptional successors are excluded, as the exception
|
|
||||||
* edge leaves the basic block and the normal successor may be dead.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, CfgNode succ
|
|
||||||
where basicBlockAnnotationGap(a, succ)
|
|
||||||
select a, "Annotated node followed by unannotated $@ in the same basic block", succ,
|
|
||||||
succ.getNode().toString()
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of BasicBlockOrdering.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that within a single basic block, annotations appear in
|
|
||||||
* increasing minimum-timestamp order.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, TimerCfgNode b, int minA, int minB
|
|
||||||
where basicBlockOrdering(a, b, minA, minB)
|
|
||||||
select a, "Basic block ordering: $@ appears before $@", a.getTimestampExpr(minA),
|
|
||||||
"timestamp " + minA, b.getTimestampExpr(minB), "timestamp " + minB
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of BranchTimestamps.
|
|
||||||
*
|
|
||||||
* Checks that when a node has both a true and false successor, the
|
|
||||||
* live timestamps on one branch are marked as dead on the other.
|
|
||||||
* This ensures that boolean branches are fully annotated with dead()
|
|
||||||
* markers for the paths not taken.
|
|
||||||
*
|
|
||||||
* Limitation: the `@ t[ts, ...]` / `dead(ts)` annotation scheme can only
|
|
||||||
* model branch-dead-ness for plain boolean control flow that reconverges
|
|
||||||
* linearly after the split — i.e. `if`-with-else and `if`-expression.
|
|
||||||
* It cannot model:
|
|
||||||
*
|
|
||||||
* * loops (`while` / `for`): body timestamps repeat across iterations,
|
|
||||||
* so the loop-exit annotation can't list them as dead;
|
|
||||||
* * `match` statements: each `case` body is a syntactically distinct
|
|
||||||
* sub-tree, and the branches don't reconverge through a common
|
|
||||||
* annotation point in the timeline;
|
|
||||||
* * `try` / `with` and `raise` / `assert`: exception edges are modelled
|
|
||||||
* as true/false but flow to syntactically distinct handlers, with no
|
|
||||||
* reconvergence in the linear annotation order;
|
|
||||||
* * short-circuit `and` / `or` (`BoolExpr`): the branches reconverge at
|
|
||||||
* the BoolExpr's after-node, so timestamps on one branch are live
|
|
||||||
* downstream of the other rather than dead;
|
|
||||||
* * `if` without an `else` clause, and `if`/`elif` chains: the false
|
|
||||||
* branch reconverges with the true branch at the post-if statement
|
|
||||||
* (no-else) or fans out across multiple elif-test annotations,
|
|
||||||
* neither of which fit the binary annotation scheme.
|
|
||||||
*
|
|
||||||
* Branch nodes inside those constructs are therefore whitelisted out
|
|
||||||
* below. The check still fires (and is useful) for plain `if`/`else`
|
|
||||||
* and conditional-expression branching.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `f` contains a construct whose branches the linear-timestamp
|
|
||||||
* annotation scheme cannot describe (see file-level comment).
|
|
||||||
*/
|
|
||||||
private predicate hasUnmodellableBranching(Function f) {
|
|
||||||
exists(AstNode bad |
|
|
||||||
bad.getScope() = f and
|
|
||||||
(
|
|
||||||
bad instanceof While
|
|
||||||
or
|
|
||||||
bad instanceof For
|
|
||||||
or
|
|
||||||
bad instanceof MatchStmt
|
|
||||||
or
|
|
||||||
bad instanceof Try
|
|
||||||
or
|
|
||||||
bad instanceof With
|
|
||||||
or
|
|
||||||
bad instanceof Raise
|
|
||||||
or
|
|
||||||
bad instanceof Assert
|
|
||||||
or
|
|
||||||
bad instanceof BoolExpr
|
|
||||||
or
|
|
||||||
bad instanceof If and
|
|
||||||
(not exists(bad.(If).getAnOrelse()) or bad.(If).isElif())
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
from TimerCfgNode node, int ts, string branch
|
|
||||||
where
|
|
||||||
missingBranchTimestamp(node, ts, branch) and
|
|
||||||
not hasUnmodellableBranching(node.getTestFunction())
|
|
||||||
select node,
|
|
||||||
"Timestamp " + ts + " on true/false branch is missing a dead() annotation on the " + branch +
|
|
||||||
" successor in $@", node.getTestFunction(), node.getTestFunction().getName()
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of ConsecutivePredecessorTimestamps.
|
|
||||||
*
|
|
||||||
* Checks that each annotated node (except the minimum timestamp) has
|
|
||||||
* a predecessor annotation with timestamp `a - 1`. This is the reverse
|
|
||||||
* of ConsecutiveTimestamps: it catches nodes that are reachable but
|
|
||||||
* arrived at from the wrong place (skipping an intermediate node).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerAnnotation ann, int a
|
|
||||||
where consecutivePredecessorTimestamps(ann, a)
|
|
||||||
select ann, "$@ in $@ has no consecutive predecessor (expected " + (a - 1) + ")",
|
|
||||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of ConsecutiveTimestamps.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that consecutive annotated nodes have consecutive timestamps:
|
|
||||||
* for each annotation with timestamp `a`, some CFG node for that annotation
|
|
||||||
* must have a next annotation containing `a + 1`.
|
|
||||||
*
|
|
||||||
* Handles CFG splitting (e.g., finally blocks duplicated for normal/exceptional
|
|
||||||
* flow) by checking that at least one split has the required successor.
|
|
||||||
*
|
|
||||||
* Only applies to functions where all annotations are in the function's
|
|
||||||
* own scope (excludes tests with generators, async, comprehensions, or
|
|
||||||
* lambdas that have annotations in nested scopes).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerAnnotation ann, int a
|
|
||||||
where consecutiveTimestamps(ann, a)
|
|
||||||
select ann, "$@ in $@ has no consecutive successor (expected " + (a + 1) + ")",
|
|
||||||
ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName()
|
|
||||||
@@ -1,120 +0,0 @@
|
|||||||
/**
|
|
||||||
* Implementation of the evaluation-order CFG signature using the new
|
|
||||||
* shared control flow graph from AstNodeImpl.
|
|
||||||
*/
|
|
||||||
|
|
||||||
private import python as Py
|
|
||||||
import TimerUtils
|
|
||||||
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
|
||||||
private import codeql.controlflow.SuccessorType
|
|
||||||
|
|
||||||
private class NewControlFlowNode = CfgImpl::ControlFlowNode;
|
|
||||||
|
|
||||||
private class NewBasicBlock = CfgImpl::BasicBlock;
|
|
||||||
|
|
||||||
/** New (shared) CFG implementation of the evaluation-order signature. */
|
|
||||||
module NewCfg implements EvalOrderCfgSig {
|
|
||||||
class CfgNode instanceof NewControlFlowNode {
|
|
||||||
// We must pick a *unique* representative CFG node for each AST node. The
|
|
||||||
// shared CFG has several nodes per AST node (before / in-post-order / after
|
|
||||||
// / after-value splits), but the timer test framework keys annotations on
|
|
||||||
// `getNode()` and assumes one CFG node per annotated AST node. Without a
|
|
||||||
// filter, an annotated `f()` would map to both `f()` and `After f()`, which
|
|
||||||
// breaks two framework invariants: (1) the "no shared reachable" check
|
|
||||||
// requires that two distinct nodes sharing a timestamp be mutually
|
|
||||||
// unreachable (true/false branches of a condition), but `Before f()`,
|
|
||||||
// `f()` and `After f()` share the annotation's timestamp *and* lie on one
|
|
||||||
// linear path; and (2) the annotation walk (`nextTimerAnnotation`) halts at
|
|
||||||
// the first reachable representative, so a second node for the same AST
|
|
||||||
// node would stall the walk on the same timestamp instead of advancing to
|
|
||||||
// the next evaluation event.
|
|
||||||
//
|
|
||||||
// We use the "after" node (`isAfter`) rather than the canonical `injects`
|
|
||||||
// node, because `injects` represents short-circuit / conditional
|
|
||||||
// expressions (`and`/`or`/`not`/ternary) by their *before* node, placing
|
|
||||||
// them ahead of their operands — wrong for evaluation order. `isAfter`
|
|
||||||
// instead picks the post-evaluation node: the merged before/after node for
|
|
||||||
// simple leaves, the `TAfterNode` for post-order expressions, and the
|
|
||||||
// `AfterValueNode`(s) for pre-order conditionals, all positioned after the
|
|
||||||
// operands. The two value-split nodes of a conditional are genuinely
|
|
||||||
// distinct evaluation outcomes (handled by `getATrueSuccessor` /
|
|
||||||
// `getAFalseSuccessor`), so they do not violate the uniqueness assumption.
|
|
||||||
CfgNode() { NewControlFlowNode.super.isAfter(_) }
|
|
||||||
|
|
||||||
string toString() { result = NewControlFlowNode.super.toString() }
|
|
||||||
|
|
||||||
Py::Location getLocation() { result = NewControlFlowNode.super.getLocation() }
|
|
||||||
|
|
||||||
Py::AstNode getNode() {
|
|
||||||
result = CfgImpl::astNodeToPyNode(NewControlFlowNode.super.getAstNode())
|
|
||||||
}
|
|
||||||
|
|
||||||
CfgNode getASuccessor() { nextCfgNode(this, result) }
|
|
||||||
|
|
||||||
CfgNode getATrueSuccessor() {
|
|
||||||
NewControlFlowNode.super.isAfterTrue(_) and
|
|
||||||
// Only where there's also a false branch (true boolean split)
|
|
||||||
exists(NewControlFlowNode other | other.isAfterFalse(NewControlFlowNode.super.getAstNode())) and
|
|
||||||
nextCfgNodeFrom(this, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
CfgNode getAFalseSuccessor() {
|
|
||||||
NewControlFlowNode.super.isAfterFalse(_) and
|
|
||||||
// Only where there's also a true branch (true boolean split)
|
|
||||||
exists(NewControlFlowNode other | other.isAfterTrue(NewControlFlowNode.super.getAstNode())) and
|
|
||||||
nextCfgNodeFrom(this, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
CfgNode getAnExceptionalSuccessor() {
|
|
||||||
exists(NewControlFlowNode mid |
|
|
||||||
mid = NewControlFlowNode.super.getAnExceptionSuccessor() and
|
|
||||||
nextCfgNodeFrom(mid, result)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
Py::Scope getScope() { result = NewControlFlowNode.super.getEnclosingCallable().asScope() }
|
|
||||||
|
|
||||||
BasicBlock getBasicBlock() {
|
|
||||||
exists(NewBasicBlock bb, int i | bb.getNode(i) = this and result = bb)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `next` is the nearest CfgNode reachable from `n` via
|
|
||||||
* one or more raw CFG successor edges, skipping non-CfgNode intermediaries.
|
|
||||||
*/
|
|
||||||
private predicate nextCfgNodeFrom(NewControlFlowNode n, CfgNode next) {
|
|
||||||
next = n.getASuccessor()
|
|
||||||
or
|
|
||||||
exists(NewControlFlowNode mid |
|
|
||||||
mid = n.getASuccessor() and
|
|
||||||
not mid instanceof CfgNode and
|
|
||||||
nextCfgNodeFrom(mid, next)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `next` is the nearest CfgNode successor of `n`,
|
|
||||||
* skipping synthetic intermediate nodes.
|
|
||||||
*/
|
|
||||||
private predicate nextCfgNode(CfgNode n, CfgNode next) { nextCfgNodeFrom(n, next) }
|
|
||||||
|
|
||||||
class BasicBlock instanceof NewBasicBlock {
|
|
||||||
string toString() { result = NewBasicBlock.super.toString() }
|
|
||||||
|
|
||||||
CfgNode getNode(int n) { result = NewBasicBlock.super.getNode(n) }
|
|
||||||
|
|
||||||
predicate reaches(BasicBlock bb) { this = bb or this.strictlyReaches(bb) }
|
|
||||||
|
|
||||||
predicate strictlyReaches(BasicBlock bb) { NewBasicBlock.super.getASuccessor+() = bb }
|
|
||||||
|
|
||||||
predicate strictlyDominates(BasicBlock bb) { NewBasicBlock.super.strictlyDominates(bb) }
|
|
||||||
}
|
|
||||||
|
|
||||||
CfgNode scopeGetEntryNode(Py::Scope s) {
|
|
||||||
exists(CfgImpl::ControlFlow::EntryNode entry |
|
|
||||||
entry.getEnclosingCallable().asScope() = s and
|
|
||||||
nextCfgNodeFrom(entry, result)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of NeverReachable.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that expressions annotated with `t.never` either have no CFG
|
|
||||||
* node, or if they do, that the node is not reachable from its scope's
|
|
||||||
* entry (including within the same basic block).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerAnnotation ann
|
|
||||||
where neverReachable(ann)
|
|
||||||
select ann, "Node annotated with t.never is reachable in $@", ann.getTestFunction(),
|
|
||||||
ann.getTestFunction().getName()
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of NoBackwardFlow.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that time never flows backward between consecutive timer annotations
|
|
||||||
* in the CFG. For each pair of consecutive annotated nodes (A -> B), there must
|
|
||||||
* exist timestamps a in A and b in B with a < b.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, TimerCfgNode b, int minA, int maxB
|
|
||||||
where noBackwardFlow(a, b, minA, maxB)
|
|
||||||
select a, "Backward flow: $@ flows to $@ (max timestamp $@)", a.getTimestampExpr(minA),
|
|
||||||
minA.toString(), b, b.getNode().toString(), b.getTimestampExpr(maxB), maxB.toString()
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of NoBasicBlock.
|
|
||||||
*
|
|
||||||
* Checks that every annotated CFG node belongs to a basic block.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from CfgNode n, TestFunction f
|
|
||||||
where noBasicBlock(n, f)
|
|
||||||
select n, "CFG node in $@ does not belong to any basic block", f, f.getName()
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of NoSharedReachable.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Checks that two annotations sharing a timestamp value are on
|
|
||||||
* mutually exclusive CFG paths (neither can reach the other).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, TimerCfgNode b, int ts
|
|
||||||
where noSharedReachable(a, b, ts)
|
|
||||||
select a, "Shared timestamp $@ but this node reaches $@", a.getTimestampExpr(ts), ts.toString(), b,
|
|
||||||
b.getNode().toString()
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/**
|
|
||||||
* New-CFG version of StrictForward.
|
|
||||||
*
|
|
||||||
* Original:
|
|
||||||
* Stronger version of NoBackwardFlow: for consecutive annotated nodes
|
|
||||||
* A -> B that both have a single timestamp (non-loop code) and B does
|
|
||||||
* NOT dominate A (forward edge), requires max(A) < min(B).
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import TimerUtils
|
|
||||||
import NewCfgImpl
|
|
||||||
|
|
||||||
private module Utils = EvalOrderCfgUtils<NewCfg>;
|
|
||||||
|
|
||||||
private import Utils
|
|
||||||
private import Utils::CfgTests
|
|
||||||
|
|
||||||
from TimerCfgNode a, TimerCfgNode b, int maxA, int minB
|
|
||||||
where strictForward(a, b, maxA, minB)
|
|
||||||
select a, "Strict forward violation: $@ flows to $@", a.getTimestampExpr(maxA), "timestamp " + maxA,
|
|
||||||
b.getTimestampExpr(minB), "timestamp " + minB
|
|
||||||
@@ -3,14 +3,14 @@
|
|||||||
* Python control flow graph.
|
* Python control flow graph.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private import python as Py
|
private import python as PY
|
||||||
import TimerUtils
|
import TimerUtils
|
||||||
|
|
||||||
/** Existing Python CFG implementation of the evaluation-order signature. */
|
/** Existing Python CFG implementation of the evaluation-order signature. */
|
||||||
module OldCfg implements EvalOrderCfgSig {
|
module OldCfg implements EvalOrderCfgSig {
|
||||||
class CfgNode = Py::ControlFlowNode;
|
class CfgNode = PY::ControlFlowNode;
|
||||||
|
|
||||||
class BasicBlock = Py::BasicBlock;
|
class BasicBlock = PY::BasicBlock;
|
||||||
|
|
||||||
CfgNode scopeGetEntryNode(Py::Scope s) { result = s.getEntryNode() }
|
CfgNode scopeGetEntryNode(PY::Scope s) { result = s.getEntryNode() }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ def test_nested_if_else(t):
|
|||||||
else:
|
else:
|
||||||
z = 2 @ t[dead(4)]
|
z = 2 @ t[dead(4)]
|
||||||
else:
|
else:
|
||||||
z = 3 @ t[dead(3), dead(4)]
|
z = 3 @ t[dead(4)]
|
||||||
w = 0 @ t[5]
|
w = 0 @ t[5]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
/**
|
|
||||||
* Inline-expectations test for the store/load/delete/parameter
|
|
||||||
* classification predicates on the new-CFG facade.
|
|
||||||
*
|
|
||||||
* Each tag fires when the corresponding predicate (`isLoad`,
|
|
||||||
* `isStore`, `isDelete`, `isParameter`, `isAugLoad`, `isAugStore`)
|
|
||||||
* holds on the canonical CFG node wrapping a `Py::Name` with the
|
|
||||||
* given identifier. Subscript and attribute stores are not covered
|
|
||||||
* by these tags — only the `Name`-typed targets/loads they involve.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import python
|
|
||||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
|
||||||
import utils.test.InlineExpectationsTest
|
|
||||||
|
|
||||||
module StoreLoadTest implements TestSig {
|
|
||||||
string getARelevantTag() { result = ["load", "store", "delete", "param", "augload", "augstore"] }
|
|
||||||
|
|
||||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
|
||||||
exists(Cfg::NameNode n |
|
|
||||||
location = n.getLocation() and
|
|
||||||
element = n.toString() and
|
|
||||||
value = n.getId() and
|
|
||||||
(
|
|
||||||
n.isLoad() and not n.isAugLoad() and tag = "load"
|
|
||||||
or
|
|
||||||
n.isStore() and not n.isAugStore() and tag = "store"
|
|
||||||
or
|
|
||||||
n.isDelete() and tag = "delete"
|
|
||||||
or
|
|
||||||
n.isParameter() and tag = "param"
|
|
||||||
or
|
|
||||||
n.isAugLoad() and tag = "augload"
|
|
||||||
or
|
|
||||||
n.isAugStore() and tag = "augstore"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
import MakeTest<StoreLoadTest>
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
# Store/load/delete/parameter classification on the new-CFG facade.
|
|
||||||
#
|
|
||||||
# Each annotated location carries the (sorted, deduplicated) set of
|
|
||||||
# kinds the CFG facade reports there. Comparing against the legacy
|
|
||||||
# 'semmle.python.Flow' classification is done by the comparison query
|
|
||||||
# 'StoreLoadParity.ql' — annotations here are only the positive
|
|
||||||
# assertions for the new facade.
|
|
||||||
#
|
|
||||||
# Tags:
|
|
||||||
# load=<id> -- isLoad() fires on the Name
|
|
||||||
# store=<id> -- isStore() fires
|
|
||||||
# delete=<id> -- isDelete() fires
|
|
||||||
# param=<id> -- isParameter() fires
|
|
||||||
# augload=<id> -- isAugLoad() fires (the LHS of x += ... when read)
|
|
||||||
# augstore=<id> -- isAugStore() fires (the LHS of x += ... when written)
|
|
||||||
|
|
||||||
|
|
||||||
# --- plain load / store / delete ---
|
|
||||||
|
|
||||||
x = 1 # $ store=x
|
|
||||||
y = x + 1 # $ store=y load=x
|
|
||||||
print(y) # $ load=print load=y
|
|
||||||
del x # $ delete=x
|
|
||||||
|
|
||||||
|
|
||||||
# --- function definitions (parameters) ---
|
|
||||||
|
|
||||||
def f(a, b=2, *args, c, **kwargs): # $ store=f param=a param=b param=args param=c param=kwargs
|
|
||||||
return a + b + c # $ load=a load=b load=c
|
|
||||||
|
|
||||||
|
|
||||||
# --- augmented assignment splits one Name into load + store halves ---
|
|
||||||
|
|
||||||
def aug(): # $ store=aug
|
|
||||||
n = 0 # $ store=n
|
|
||||||
n += 1 # $ augload=n augstore=n
|
|
||||||
return n # $ load=n
|
|
||||||
|
|
||||||
|
|
||||||
# --- subscript / attribute stores ---
|
|
||||||
|
|
||||||
class C: # $ store=C
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def stores(obj, container, idx): # $ store=stores param=obj param=container param=idx
|
|
||||||
obj.attr = 1 # $ load=obj
|
|
||||||
container[idx] = 2 # $ load=container load=idx
|
|
||||||
return obj # $ load=obj
|
|
||||||
|
|
||||||
|
|
||||||
# --- tuple unpacking ---
|
|
||||||
|
|
||||||
def unpack(pair): # $ store=unpack param=pair
|
|
||||||
a, b = pair # $ store=a store=b load=pair
|
|
||||||
return a + b # $ load=a load=b
|
|
||||||
@@ -589,11 +589,11 @@ def test_zip_tuple():
|
|||||||
|
|
||||||
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
|
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
|
||||||
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
|
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
|
||||||
SINK_F(z[0][2])
|
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
|
||||||
SINK_F(z[0][3])
|
SINK_F(z[0][3])
|
||||||
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
|
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
|
||||||
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
||||||
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
|
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
|
||||||
SINK_F(z[1][3])
|
SINK_F(z[1][3])
|
||||||
|
|
||||||
@expects(4)
|
@expects(4)
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ class MyClass2(object):
|
|||||||
print(self.foo) # $ tracked MISSING: tracked=foo
|
print(self.foo) # $ tracked MISSING: tracked=foo
|
||||||
|
|
||||||
instance = MyClass2()
|
instance = MyClass2()
|
||||||
print(instance.foo) # $ tracked MISSING: tracked=foo
|
print(instance.foo) # $ MISSING: tracked=foo tracked
|
||||||
instance.print_foo() # $ MISSING: tracked=foo
|
instance.print_foo() # $ MISSING: tracked=foo
|
||||||
|
|
||||||
|
|
||||||
@@ -195,7 +195,7 @@ class Sub1(Base1):
|
|||||||
|
|
||||||
sub1 = Sub1()
|
sub1 = Sub1()
|
||||||
sub1.read_foo()
|
sub1.read_foo()
|
||||||
print(sub1.foo) # $ tracked MISSING: tracked=foo
|
print(sub1.foo) # $ MISSING: tracked=foo tracked
|
||||||
|
|
||||||
|
|
||||||
# attribute written in a subclass method, read in an inherited base class method
|
# attribute written in a subclass method, read in an inherited base class method
|
||||||
@@ -210,7 +210,7 @@ class Sub2(Base2):
|
|||||||
|
|
||||||
sub2 = Sub2()
|
sub2 = Sub2()
|
||||||
sub2.read_bar()
|
sub2.read_bar()
|
||||||
print(sub2.bar) # $ tracked MISSING: tracked=bar
|
print(sub2.bar) # $ MISSING: tracked=bar tracked
|
||||||
|
|
||||||
|
|
||||||
# attribute written in a base class method, read on an instance of the subclass
|
# attribute written in a base class method, read on an instance of the subclass
|
||||||
@@ -223,4 +223,4 @@ class Sub3(Base3):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
sub3 = Sub3()
|
sub3 = Sub3()
|
||||||
print(sub3.baz) # $ tracked MISSING: tracked=baz
|
print(sub3.baz) # $ MISSING: tracked=baz tracked
|
||||||
|
|||||||
@@ -362,7 +362,7 @@ def test_load_in_bulk():
|
|||||||
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
|
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
|
||||||
d = TestLoad.objects.in_bulk([1])
|
d = TestLoad.objects.in_bulk([1])
|
||||||
for val in d.values():
|
for val in d.values():
|
||||||
SINK(val.text) # $ MISSING: flow
|
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
|
||||||
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"
|
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
#select
|
#select
|
||||||
| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value |
|
| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value |
|
||||||
| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value |
|
| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value |
|
||||||
| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value |
|
|
||||||
| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value |
|
| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value |
|
||||||
| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value |
|
| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value |
|
||||||
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value |
|
| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value |
|
||||||
@@ -25,8 +24,6 @@ edges
|
|||||||
| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | |
|
| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | |
|
||||||
| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | |
|
| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | |
|
||||||
| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | |
|
| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | |
|
||||||
| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | |
|
|
||||||
| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | |
|
|
||||||
| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | |
|
| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | |
|
||||||
| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | |
|
| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | |
|
||||||
| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | |
|
| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | |
|
||||||
@@ -54,9 +51,6 @@ nodes
|
|||||||
| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||||
| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||||
| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||||
| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
|
||||||
| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
|
||||||
| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
|
||||||
| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
|
||||||
| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||||
| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source
|
|||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
@app.get("/unsafe3/")
|
@app.get("/unsafe3/")
|
||||||
async def unsafe3(name: str): # $ Source
|
async def unsafe3(name: str): # $ MISSING: Source
|
||||||
query = "select * from users where name=" + name
|
query = "select * from users where name=" + name
|
||||||
cursor = hdb_con3.cursor()
|
cursor = hdb_con3.cursor()
|
||||||
cursor.execute(query) # $ Alert
|
cursor.execute(query) # $ MISSING: Alert
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
@app.get("/unsafe4/")
|
@app.get("/unsafe4/")
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ nodes
|
|||||||
| string_flow.rb:227:10:227:10 | a | semmle.label | a |
|
| string_flow.rb:227:10:227:10 | a | semmle.label | a |
|
||||||
subpaths
|
subpaths
|
||||||
testFailures
|
testFailures
|
||||||
|
| string_flow.rb:85:10:85:10 | a | Unexpected result: hasValueFlow=a |
|
||||||
|
| string_flow.rb:227:10:227:10 | a | Unexpected result: hasValueFlow=a |
|
||||||
#select
|
#select
|
||||||
| string_flow.rb:3:10:3:22 | call to new | string_flow.rb:2:9:2:18 | call to source | string_flow.rb:3:10:3:22 | call to new | $@ | string_flow.rb:2:9:2:18 | call to source | call to source |
|
| string_flow.rb:3:10:3:22 | call to new | string_flow.rb:2:9:2:18 | call to source | string_flow.rb:3:10:3:22 | call to new | $@ | string_flow.rb:2:9:2:18 | call to source | call to source |
|
||||||
| string_flow.rb:85:10:85:10 | a | string_flow.rb:83:9:83:18 | call to source | string_flow.rb:85:10:85:10 | a | $@ | string_flow.rb:83:9:83:18 | call to source | call to source |
|
| string_flow.rb:85:10:85:10 | a | string_flow.rb:83:9:83:18 | call to source | string_flow.rb:85:10:85:10 | a | $@ | string_flow.rb:83:9:83:18 | call to source | call to source |
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ end
|
|||||||
def m_clear
|
def m_clear
|
||||||
a = source "a"
|
a = source "a"
|
||||||
a.clear
|
a.clear
|
||||||
sink a # $ SPURIOUS: hasValueFlow=a
|
sink a
|
||||||
end
|
end
|
||||||
|
|
||||||
# concat and prepend omitted because they clash with the summaries for
|
# concat and prepend omitted because they clash with the summaries for
|
||||||
@@ -224,7 +224,7 @@ def m_replace
|
|||||||
b = source "b"
|
b = source "b"
|
||||||
sink a.replace(b) # $ hasTaintFlow=b
|
sink a.replace(b) # $ hasTaintFlow=b
|
||||||
# TODO: currently we get value flow for a, because we don't clear content
|
# TODO: currently we get value flow for a, because we don't clear content
|
||||||
sink a # $ hasTaintFlow=b SPURIOUS: hasValueFlow=a
|
sink a # $ hasTaintFlow=b
|
||||||
end
|
end
|
||||||
|
|
||||||
def m_reverse
|
def m_reverse
|
||||||
@@ -316,4 +316,4 @@ def m_upto(i)
|
|||||||
a.upto("b", true) { |x| sink x } # $ hasTaintFlow=a
|
a.upto("b", true) { |x| sink x } # $ hasTaintFlow=a
|
||||||
"b".upto(a) { |x| sink x } # $ hasTaintFlow=a
|
"b".upto(a) { |x| sink x } # $ hasTaintFlow=a
|
||||||
"b".upto(a, true) { |x| sink x }
|
"b".upto(a, true) { |x| sink x }
|
||||||
end
|
end
|
||||||
@@ -9,7 +9,7 @@ end
|
|||||||
class OneController < ActionController::Base
|
class OneController < ActionController::Base
|
||||||
before_action :a
|
before_action :a
|
||||||
after_action :c
|
after_action :c
|
||||||
|
|
||||||
def a
|
def a
|
||||||
@foo = params[:foo]
|
@foo = params[:foo]
|
||||||
end
|
end
|
||||||
@@ -18,14 +18,14 @@ class OneController < ActionController::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def c
|
def c
|
||||||
sink @foo # $ hasTaintFlow
|
sink @foo
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class TwoController < ActionController::Base
|
class TwoController < ActionController::Base
|
||||||
before_action :a
|
before_action :a
|
||||||
after_action :c
|
after_action :c
|
||||||
|
|
||||||
def a
|
def a
|
||||||
@foo = params[:foo]
|
@foo = params[:foo]
|
||||||
end
|
end
|
||||||
@@ -35,14 +35,14 @@ class TwoController < ActionController::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def c
|
def c
|
||||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
sink @foo
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class ThreeController < ActionController::Base
|
class ThreeController < ActionController::Base
|
||||||
before_action :a
|
before_action :a
|
||||||
after_action :c
|
after_action :c
|
||||||
|
|
||||||
def a
|
def a
|
||||||
@foo = params[:foo]
|
@foo = params[:foo]
|
||||||
@foo = "safe"
|
@foo = "safe"
|
||||||
@@ -52,14 +52,14 @@ class ThreeController < ActionController::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def c
|
def c
|
||||||
sink @foo # $ SPURIOUS: hasTaintFlow
|
sink @foo
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class FourController < ActionController::Base
|
class FourController < ActionController::Base
|
||||||
before_action :a
|
before_action :a
|
||||||
after_action :c
|
after_action :c
|
||||||
|
|
||||||
def a
|
def a
|
||||||
@foo.bar = params[:foo]
|
@foo.bar = params[:foo]
|
||||||
end
|
end
|
||||||
@@ -68,14 +68,14 @@ class FourController < ActionController::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def c
|
def c
|
||||||
sink(@foo.bar) # $ hasTaintFlow
|
sink(@foo.bar)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class FiveController < ActionController::Base
|
class FiveController < ActionController::Base
|
||||||
before_action :a
|
before_action :a
|
||||||
after_action :c
|
after_action :c
|
||||||
|
|
||||||
def a
|
def a
|
||||||
self.taint_foo
|
self.taint_foo
|
||||||
end
|
end
|
||||||
@@ -84,10 +84,10 @@ class FiveController < ActionController::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def c
|
def c
|
||||||
sink @foo # $ hasTaintFlow
|
sink @foo
|
||||||
end
|
end
|
||||||
|
|
||||||
def taint_foo
|
def taint_foo
|
||||||
@foo = params[:foo]
|
@foo = params[:foo]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -270,6 +270,11 @@ nodes
|
|||||||
| params_flow.rb:205:10:205:10 | a | semmle.label | a |
|
| params_flow.rb:205:10:205:10 | a | semmle.label | a |
|
||||||
subpaths
|
subpaths
|
||||||
testFailures
|
testFailures
|
||||||
|
| filter_flow.rb:21:10:21:13 | @foo | Unexpected result: hasTaintFlow |
|
||||||
|
| filter_flow.rb:38:10:38:13 | @foo | Unexpected result: hasTaintFlow |
|
||||||
|
| filter_flow.rb:55:10:55:13 | @foo | Unexpected result: hasTaintFlow |
|
||||||
|
| filter_flow.rb:71:10:71:17 | call to bar | Unexpected result: hasTaintFlow |
|
||||||
|
| filter_flow.rb:87:11:87:14 | @foo | Unexpected result: hasTaintFlow |
|
||||||
#select
|
#select
|
||||||
| filter_flow.rb:21:10:21:13 | @foo | filter_flow.rb:14:12:14:17 | call to params | filter_flow.rb:21:10:21:13 | @foo | $@ | filter_flow.rb:14:12:14:17 | call to params | call to params |
|
| filter_flow.rb:21:10:21:13 | @foo | filter_flow.rb:14:12:14:17 | call to params | filter_flow.rb:21:10:21:13 | @foo | $@ | filter_flow.rb:14:12:14:17 | call to params | call to params |
|
||||||
| filter_flow.rb:38:10:38:13 | @foo | filter_flow.rb:30:12:30:17 | call to params | filter_flow.rb:38:10:38:13 | @foo | $@ | filter_flow.rb:30:12:30:17 | call to params | call to params |
|
| filter_flow.rb:38:10:38:13 | @foo | filter_flow.rb:30:12:30:17 | call to params | filter_flow.rb:38:10:38:13 | @foo | $@ | filter_flow.rb:30:12:30:17 | call to params | call to params |
|
||||||
|
|||||||
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
|||||||
Err(err) => err.to_compile_error().into(),
|
Err(err) => err.to_compile_error().into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||||
|
///
|
||||||
|
/// Use `manual_rule!` when the transform needs control over capture
|
||||||
|
/// translation timing — for example, when an outer rule needs to set
|
||||||
|
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||||
|
/// translation reaches inner rules that read that state.
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// manual_rule!(
|
||||||
|
/// (query_pattern field: (_) @name)
|
||||||
|
/// {
|
||||||
|
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||||
|
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||||
|
/// let translated = ctx.translate(name)?;
|
||||||
|
/// Ok(translated)
|
||||||
|
/// }
|
||||||
|
/// )
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Differences from [`rule!`]:
|
||||||
|
/// - Captures are **not** auto-translated before the body runs; they
|
||||||
|
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||||
|
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||||
|
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||||
|
/// tree template, no `Ok(...)` wrap.
|
||||||
|
#[proc_macro]
|
||||||
|
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||||
|
let input2: TokenStream2 = input.into();
|
||||||
|
match parse::parse_manual_rule_top(input2) {
|
||||||
|
Ok(output) => output.into(),
|
||||||
|
Err(err) => err.to_compile_error().into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,9 +22,10 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
|
|||||||
/// Parse a single query node (possibly with a trailing `@capture`).
|
/// Parse a single query node (possibly with a trailing `@capture`).
|
||||||
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
|
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||||
let base = parse_query_atom(tokens)?;
|
let base = parse_query_atom(tokens)?;
|
||||||
// Check for trailing @capture or @@capture
|
// Check for trailing @capture
|
||||||
if peek_is_at(tokens) {
|
if peek_is_at(tokens) {
|
||||||
let capture_name = consume_capture_marker(tokens)?;
|
tokens.next(); // consume @
|
||||||
|
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||||
let name_str = capture_name.to_string();
|
let name_str = capture_name.to_string();
|
||||||
Ok(quote! {
|
Ok(quote! {
|
||||||
yeast::query::QueryNode::Capture {
|
yeast::query::QueryNode::Capture {
|
||||||
@@ -158,7 +159,8 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
|||||||
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
|
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
|
||||||
} else {
|
} else {
|
||||||
let child = if peek_is_at(tokens) {
|
let child = if peek_is_at(tokens) {
|
||||||
let capture_name = consume_capture_marker(tokens)?;
|
tokens.next();
|
||||||
|
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||||
let name_str = capture_name.to_string();
|
let name_str = capture_name.to_string();
|
||||||
quote! {
|
quote! {
|
||||||
yeast::query::QueryNode::Capture {
|
yeast::query::QueryNode::Capture {
|
||||||
@@ -648,9 +650,6 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
|||||||
struct CaptureInfo {
|
struct CaptureInfo {
|
||||||
name: String,
|
name: String,
|
||||||
multiplicity: CaptureMultiplicity,
|
multiplicity: CaptureMultiplicity,
|
||||||
/// `true` for `@@name` captures: the auto-translate prefix skips them,
|
|
||||||
/// so the bound `NodeRef` refers to the raw (input-schema) node.
|
|
||||||
raw: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
@@ -709,14 +708,6 @@ fn extract_captures_inner(
|
|||||||
extract_captures_inner(&mut inner, captures, child_mult);
|
extract_captures_inner(&mut inner, captures, child_mult);
|
||||||
}
|
}
|
||||||
TokenTree::Punct(p) if p.as_char() == '@' => {
|
TokenTree::Punct(p) if p.as_char() == '@' => {
|
||||||
// `@@name` marks the capture as raw (skip auto-translate).
|
|
||||||
let raw = matches!(
|
|
||||||
tokens.peek(),
|
|
||||||
Some(TokenTree::Punct(p)) if p.as_char() == '@'
|
|
||||||
);
|
|
||||||
if raw {
|
|
||||||
tokens.next(); // consume the second `@`
|
|
||||||
}
|
|
||||||
if let Some(TokenTree::Ident(name)) = tokens.next() {
|
if let Some(TokenTree::Ident(name)) = tokens.next() {
|
||||||
let mult = if parent_mult == CaptureMultiplicity::Repeated
|
let mult = if parent_mult == CaptureMultiplicity::Repeated
|
||||||
|| last_mult == CaptureMultiplicity::Repeated
|
|| last_mult == CaptureMultiplicity::Repeated
|
||||||
@@ -732,7 +723,6 @@ fn extract_captures_inner(
|
|||||||
captures.push(CaptureInfo {
|
captures.push(CaptureInfo {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
multiplicity: mult,
|
multiplicity: mult,
|
||||||
raw,
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
last_mult = CaptureMultiplicity::Single;
|
last_mult = CaptureMultiplicity::Single;
|
||||||
@@ -786,14 +776,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
|||||||
// Parse query
|
// Parse query
|
||||||
let query_code = parse_query_top(query_stream.clone())?;
|
let query_code = parse_query_top(query_stream.clone())?;
|
||||||
|
|
||||||
// Capture names marked `@@name` (raw) — passed to the auto-translate
|
|
||||||
// prefix as a skip list so those captures keep their input-schema ids.
|
|
||||||
let raw_capture_names: Vec<&str> = captures
|
|
||||||
.iter()
|
|
||||||
.filter(|c| c.raw)
|
|
||||||
.map(|c| c.name.as_str())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Generate capture bindings
|
// Generate capture bindings
|
||||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||||
let bindings: Vec<TokenStream> = captures
|
let bindings: Vec<TokenStream> = captures
|
||||||
@@ -909,14 +891,11 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
|||||||
let __query = #query_code;
|
let __query = #query_code;
|
||||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||||
// Auto-translation prefix: recursively translate every
|
// Auto-translation prefix: recursively translate every
|
||||||
// captured node before invoking the user's transform body,
|
// captured node before invoking the user's transform body.
|
||||||
// except for `@@name` captures listed in `__skip` which the
|
|
||||||
// body consumes raw.
|
|
||||||
// For OneShot rules this preserves the legacy behaviour
|
// For OneShot rules this preserves the legacy behaviour
|
||||||
// (input-schema captures translated to output-schema
|
// (input-schema captures translated to output-schema
|
||||||
// nodes); for Repeating rules it is a no-op.
|
// nodes); for Repeating rules it is a no-op.
|
||||||
let __skip: &[&str] = &[#(#raw_capture_names),*];
|
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
|
|
||||||
#(#bindings)*
|
#(#bindings)*
|
||||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||||
let __result: Vec<usize> = { #transform_body };
|
let __result: Vec<usize> = { #transform_body };
|
||||||
@@ -926,6 +905,106 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse `manual_rule!( query { body } )`.
|
||||||
|
///
|
||||||
|
/// Like [`parse_rule_top`] but:
|
||||||
|
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||||
|
/// - Generates code that does NOT auto-translate captures before
|
||||||
|
/// running the body. Capture variables refer to raw (input-schema)
|
||||||
|
/// nodes; the body is responsible for explicit translation via
|
||||||
|
/// `ctx.translate(...)`.
|
||||||
|
/// - The body is included verbatim and must evaluate to
|
||||||
|
/// `Result<Vec<usize>, String>`.
|
||||||
|
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||||
|
let mut tokens = input.into_iter().peekable();
|
||||||
|
|
||||||
|
// Collect query tokens up to the body block `{ ... }`.
|
||||||
|
let mut query_tokens = Vec::new();
|
||||||
|
loop {
|
||||||
|
match tokens.peek() {
|
||||||
|
None => {
|
||||||
|
return Err(syn::Error::new(
|
||||||
|
Span::call_site(),
|
||||||
|
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||||
|
_ => {
|
||||||
|
query_tokens.push(tokens.next().unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||||
|
|
||||||
|
// Extract captures from the query (same as in `rule!`).
|
||||||
|
let captures = extract_captures(&query_stream);
|
||||||
|
|
||||||
|
// Parse the query into the QueryNode-building expression.
|
||||||
|
let query_code = parse_query_top(query_stream)?;
|
||||||
|
|
||||||
|
// Generate capture bindings (same as in `rule!`).
|
||||||
|
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||||
|
let bindings: Vec<TokenStream> = captures
|
||||||
|
.iter()
|
||||||
|
.map(|cap| {
|
||||||
|
let name = Ident::new(&cap.name, Span::call_site());
|
||||||
|
let name_str = &cap.name;
|
||||||
|
match cap.multiplicity {
|
||||||
|
CaptureMultiplicity::Repeated => quote! {
|
||||||
|
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||||
|
.into_iter()
|
||||||
|
.map(yeast::NodeRef)
|
||||||
|
.collect();
|
||||||
|
},
|
||||||
|
CaptureMultiplicity::Optional => quote! {
|
||||||
|
let #name: Option<yeast::NodeRef> =
|
||||||
|
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||||
|
},
|
||||||
|
CaptureMultiplicity::Single => quote! {
|
||||||
|
let #name: yeast::NodeRef =
|
||||||
|
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Consume the body block.
|
||||||
|
let body_group = match tokens.next() {
|
||||||
|
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||||
|
other => {
|
||||||
|
return Err(syn::Error::new(
|
||||||
|
Span::call_site(),
|
||||||
|
format!(
|
||||||
|
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||||
|
),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let body_stream = body_group.stream();
|
||||||
|
|
||||||
|
// No tokens should follow the body.
|
||||||
|
if let Some(tok) = tokens.next() {
|
||||||
|
return Err(syn::Error::new_spanned(
|
||||||
|
tok,
|
||||||
|
"unexpected token after manual_rule! body",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(quote! {
|
||||||
|
{
|
||||||
|
let __query = #query_code;
|
||||||
|
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||||
|
// No auto-translate prefix for manual rules — the body
|
||||||
|
// is responsible for translating captures explicitly.
|
||||||
|
#(#bindings)*
|
||||||
|
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||||
|
#body_stream
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Token utilities
|
// Token utilities
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -934,16 +1013,6 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
|
|||||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
|
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume an `@` or `@@` capture marker and the following name ident.
|
|
||||||
/// Caller has already verified `peek_is_at(tokens)`.
|
|
||||||
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
|
|
||||||
tokens.next(); // consume the first `@`
|
|
||||||
if peek_is_at(tokens) {
|
|
||||||
tokens.next(); // consume the second `@` of `@@`
|
|
||||||
}
|
|
||||||
expect_ident(tokens, "expected capture name after `@` or `@@`")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn peek_is_literal(tokens: &mut Tokens) -> bool {
|
fn peek_is_literal(tokens: &mut Tokens) -> bool {
|
||||||
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
|
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
|
||||||
}
|
}
|
||||||
@@ -1044,7 +1113,8 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
|
|||||||
|
|
||||||
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
|
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
|
||||||
if peek_is_at(tokens) {
|
if peek_is_at(tokens) {
|
||||||
let name = consume_capture_marker(tokens)?;
|
tokens.next(); // consume @
|
||||||
|
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||||
let name_str = name.to_string();
|
let name_str = name.to_string();
|
||||||
Ok(quote! {
|
Ok(quote! {
|
||||||
yeast::query::QueryNode::Capture {
|
yeast::query::QueryNode::Capture {
|
||||||
@@ -1071,12 +1141,13 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
|
/// If `@name` follows a Repeated list element, wrap each child SingleNode
|
||||||
/// child SingleNode inside the repetition with a Capture. This matches
|
/// inside the repetition with a Capture. This matches tree-sitter semantics
|
||||||
/// tree-sitter semantics where `(_)* @name` captures each matched node.
|
/// where `(_)* @name` captures each matched node.
|
||||||
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
|
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
|
||||||
if peek_is_at(tokens) {
|
if peek_is_at(tokens) {
|
||||||
let name = consume_capture_marker(tokens)?;
|
tokens.next();
|
||||||
|
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||||
let name_str = name.to_string();
|
let name_str = name.to_string();
|
||||||
// Re-parse the element isn't practical, so we generate a wrapper
|
// Re-parse the element isn't practical, so we generate a wrapper
|
||||||
// that creates a new Repeated with each child wrapped in a capture.
|
// that creates a new Repeated with each child wrapped in a capture.
|
||||||
|
|||||||
@@ -292,37 +292,6 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
|
|||||||
single capture (`Id`) and `{..name}` splices a repeated capture
|
single capture (`Id`) and `{..name}` splices a repeated capture
|
||||||
(`Vec<Id>`).
|
(`Vec<Id>`).
|
||||||
|
|
||||||
### Raw captures (`@@name`)
|
|
||||||
|
|
||||||
The default `@name` capture marker is *auto-translated*: in OneShot
|
|
||||||
phases the macro recursively translates the captured node before
|
|
||||||
binding it, so `{name}` in the output template splices a node that
|
|
||||||
already conforms to the output schema.
|
|
||||||
|
|
||||||
For rules that need the raw (input-schema) capture — typically to read
|
|
||||||
its source text or to translate it explicitly with mutable context
|
|
||||||
state between calls — use `@@name` instead. The body sees the original
|
|
||||||
input-schema `NodeRef`:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
yeast::rule!(
|
|
||||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
|
||||||
=>
|
|
||||||
{
|
|
||||||
// raw_lhs is untranslated: read its original source text.
|
|
||||||
let text = ctx.ast.source_text(raw_lhs.into());
|
|
||||||
// rhs is already translated by the auto-translate prefix.
|
|
||||||
tree!((call
|
|
||||||
method: (identifier #{text.as_str()})
|
|
||||||
receiver: {rhs}))
|
|
||||||
}
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
|
|
||||||
markers are equivalent (auto-translation is a no-op for repeating
|
|
||||||
rules).
|
|
||||||
|
|
||||||
## Complete example: for-loop desugaring
|
## Complete example: for-loop desugaring
|
||||||
|
|
||||||
This rule rewrites Ruby's `for pat in val do body end` into
|
This rule rewrites Ruby's `for pat in val do body end` into
|
||||||
|
|||||||
@@ -80,28 +80,6 @@ impl Captures {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like [`try_map_all_captures`] but leaves captures whose name appears
|
|
||||||
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
|
|
||||||
/// (raw) captures alongside the default auto-translated `@name`
|
|
||||||
/// captures.
|
|
||||||
pub fn try_map_captures_except<E>(
|
|
||||||
&mut self,
|
|
||||||
skip: &[&str],
|
|
||||||
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
|
|
||||||
) -> Result<(), E> {
|
|
||||||
for (name, ids) in self.captures.iter_mut() {
|
|
||||||
if skip.contains(name) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let mut new_ids = Vec::with_capacity(ids.len());
|
|
||||||
for &id in ids.iter() {
|
|
||||||
new_ids.extend(f(id)?);
|
|
||||||
}
|
|
||||||
*ids = new_ids;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
|
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
|
||||||
if let Some(from_ids) = self.captures.get(from) {
|
if let Some(from_ids) = self.captures.get(from) {
|
||||||
let new_values = from_ids.iter().copied().map(f).collect();
|
let new_values = from_ids.iter().copied().map(f).collect();
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ pub mod schema;
|
|||||||
pub mod tree_builder;
|
pub mod tree_builder;
|
||||||
mod visitor;
|
mod visitor;
|
||||||
|
|
||||||
pub use yeast_macros::{query, rule, tree, trees};
|
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||||
|
|
||||||
use captures::Captures;
|
use captures::Captures;
|
||||||
pub use cursor::Cursor;
|
pub use cursor::Cursor;
|
||||||
@@ -48,12 +48,6 @@ impl From<NodeRef> for Id {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Id> for NodeRef {
|
|
||||||
fn from(value: Id) -> Self {
|
|
||||||
NodeRef(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Like [`std::fmt::Display`], but the formatting routine is given access to
|
/// Like [`std::fmt::Display`], but the formatting routine is given access to
|
||||||
/// the [`Ast`] so that node references can resolve to their source text.
|
/// the [`Ast`] so that node references can resolve to their source text.
|
||||||
///
|
///
|
||||||
@@ -763,14 +757,13 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Translate every captured node in `captures` in place (OneShot phase
|
/// Translate every captured node in `captures` in place (OneShot phase
|
||||||
/// only), except for captures whose name appears in `skip` — those are
|
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||||
/// left as raw (input-schema) ids for the rule body to consume
|
/// receive raw captures.
|
||||||
/// directly. In a Repeating phase this is a no-op — Repeating rules
|
|
||||||
/// receive raw captures regardless of `skip`.
|
|
||||||
///
|
///
|
||||||
/// Used by the `rule!` macro's generated prefix. `skip` is populated
|
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||||
/// from the macro's `@@name` capture markers; for plain `@name`
|
/// pre-existing "auto-translate captures before running the transform
|
||||||
/// captures (and rules with no `@@` markers) it is empty.
|
/// body" behavior. Manually-written transforms typically translate
|
||||||
|
/// captures selectively via [`translate`] instead.
|
||||||
///
|
///
|
||||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||||
@@ -779,12 +772,11 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
|||||||
captures: &mut Captures,
|
captures: &mut Captures,
|
||||||
ast: &mut Ast,
|
ast: &mut Ast,
|
||||||
user_ctx: &mut C,
|
user_ctx: &mut C,
|
||||||
skip: &[&str],
|
|
||||||
) -> Result<(), String> {
|
) -> Result<(), String> {
|
||||||
match &self.inner {
|
match &self.inner {
|
||||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||||
let root = *matched_root;
|
let root = *matched_root;
|
||||||
captures.try_map_captures_except(skip, |cid| {
|
captures.try_map_all_captures(|cid| {
|
||||||
if cid == root {
|
if cid == root {
|
||||||
Ok(vec![cid])
|
Ok(vec![cid])
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1058,111 +1058,6 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Verify that `@@name` capture markers skip the auto-translate prefix:
|
|
||||||
/// the body sees the *raw* (input-schema) NodeRef and can read its
|
|
||||||
/// source text or call `ctx.translate(...)` explicitly. Compare with
|
|
||||||
/// the bare `@name` form, where the auto-translate prefix runs the
|
|
||||||
/// same translation up front and the body sees the post-translate id.
|
|
||||||
#[test]
|
|
||||||
fn test_raw_capture_marker() {
|
|
||||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
||||||
let schema =
|
|
||||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
||||||
let rules: Vec<Rule> = vec![
|
|
||||||
yeast::rule!(
|
|
||||||
(program (_)* @stmts)
|
|
||||||
=>
|
|
||||||
(program stmt: {..stmts})
|
|
||||||
),
|
|
||||||
// `@@raw_lhs` is untranslated: the body reads its source text
|
|
||||||
// ("x") and embeds it directly as the identifier content. `@rhs`
|
|
||||||
// is auto-translated (rhs already points to (integer "INT")).
|
|
||||||
yeast::rule!(
|
|
||||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
|
||||||
=>
|
|
||||||
{
|
|
||||||
let text = ctx.ast.source_text(raw_lhs.into());
|
|
||||||
tree!((call
|
|
||||||
method: (identifier #{text.as_str()})
|
|
||||||
receiver: {rhs}))
|
|
||||||
}
|
|
||||||
),
|
|
||||||
yeast::rule!((identifier) => (identifier "ID")),
|
|
||||||
yeast::rule!((integer) => (integer "INT")),
|
|
||||||
];
|
|
||||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
|
||||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
|
||||||
|
|
||||||
let input = "x = 1";
|
|
||||||
let ast = runner.run(input).unwrap();
|
|
||||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
||||||
// `method:` uses the raw source text ("x"); if `@@` were broken and
|
|
||||||
// auto-translation ran on `raw_lhs`, it would still produce the
|
|
||||||
// string "x" (source_text inherits the input range), so the dump
|
|
||||||
// wouldn't change here. The companion test
|
|
||||||
// `test_raw_capture_marker_explicit_translate` exercises the
|
|
||||||
// stronger property that `ctx.translate(raw_lhs)?` succeeds and
|
|
||||||
// produces the translated `(identifier "ID")`.
|
|
||||||
assert_dump_eq(
|
|
||||||
&dump,
|
|
||||||
r#"
|
|
||||||
program
|
|
||||||
stmt:
|
|
||||||
call
|
|
||||||
method: identifier "x"
|
|
||||||
receiver: integer "INT"
|
|
||||||
"#,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Companion to `test_raw_capture_marker`: confirms that calling
|
|
||||||
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
|
|
||||||
/// produces the correctly-translated output-schema node. With `@`, the
|
|
||||||
/// translation has already happened, so `ctx.translate(...)` inside the
|
|
||||||
/// body would attempt to re-translate an output node (which has no
|
|
||||||
/// matching rule and would error).
|
|
||||||
#[test]
|
|
||||||
fn test_raw_capture_marker_explicit_translate() {
|
|
||||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
||||||
let schema =
|
|
||||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
||||||
let rules: Vec<Rule> = vec![
|
|
||||||
yeast::rule!(
|
|
||||||
(program (_)* @stmts)
|
|
||||||
=>
|
|
||||||
(program stmt: {..stmts})
|
|
||||||
),
|
|
||||||
yeast::rule!(
|
|
||||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
|
||||||
=>
|
|
||||||
{
|
|
||||||
let translated_lhs = ctx.translate(raw_lhs)?;
|
|
||||||
tree!((call
|
|
||||||
method: {..translated_lhs}
|
|
||||||
receiver: {rhs}))
|
|
||||||
}
|
|
||||||
),
|
|
||||||
yeast::rule!((identifier) => (identifier "ID")),
|
|
||||||
yeast::rule!((integer) => (integer "INT")),
|
|
||||||
];
|
|
||||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
|
||||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
|
||||||
|
|
||||||
let input = "x = 1";
|
|
||||||
let ast = runner.run(input).unwrap();
|
|
||||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
||||||
assert_dump_eq(
|
|
||||||
&dump,
|
|
||||||
r#"
|
|
||||||
program
|
|
||||||
stmt:
|
|
||||||
call
|
|
||||||
method: identifier "ID"
|
|
||||||
receiver: integer "INT"
|
|
||||||
"#,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---- Cursor tests ----
|
// ---- Cursor tests ----
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use codeql_extractor::extractor::simple;
|
use codeql_extractor::extractor::simple;
|
||||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, rule, tree};
|
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree};
|
||||||
|
|
||||||
/// User context propagated from outer rules down to the inner rules that
|
/// User context propagated from outer rules down to the inner rules that
|
||||||
/// emit the corresponding output declarations, so that each emitted node
|
/// emit the corresponding output declarations, so that each emitted node
|
||||||
@@ -15,26 +15,26 @@ struct SwiftContext {
|
|||||||
/// (`computed_getter`/`computed_setter`/`computed_modify`/
|
/// (`computed_getter`/`computed_setter`/`computed_modify`/
|
||||||
/// `willset_clause`/`didset_clause`/`getter_specifier`/
|
/// `willset_clause`/`didset_clause`/`getter_specifier`/
|
||||||
/// `setter_specifier`).
|
/// `setter_specifier`).
|
||||||
property_name: Option<yeast::NodeRef>,
|
property_name: Option<yeast::Id>,
|
||||||
/// Translated type node for the property type. Set by the outer
|
/// Translated type node for the property type. Set by the outer
|
||||||
/// `property_binding` rule (computed accessors variant) and
|
/// `property_binding` rule (computed accessors variant) and
|
||||||
/// `protocol_property_declaration` when present; read by the
|
/// `protocol_property_declaration` when present; read by the
|
||||||
/// accessor inner rules.
|
/// accessor inner rules.
|
||||||
property_type: Option<yeast::NodeRef>,
|
property_type: Option<yeast::Id>,
|
||||||
/// Default-value expression for the next translated `parameter`. Set
|
/// Default-value expression for the next translated `parameter`. Set
|
||||||
/// by the outer `function_parameter` rule; read by the `parameter`
|
/// by the outer `function_parameter` rule; read by the `parameter`
|
||||||
/// rules.
|
/// rules.
|
||||||
default_value: Option<yeast::NodeRef>,
|
default_value: Option<yeast::Id>,
|
||||||
/// Translated outer modifiers (e.g. visibility, attributes) to
|
/// Translated outer modifiers (e.g. visibility, attributes) to
|
||||||
/// attach to each child of a flattening outer rule. Set by
|
/// attach to each child of a flattening outer rule. Set by
|
||||||
/// `property_declaration`, `enum_entry`, and
|
/// `property_declaration`, `enum_entry`, and
|
||||||
/// `protocol_property_declaration`.
|
/// `protocol_property_declaration`.
|
||||||
outer_modifiers: Vec<yeast::NodeRef>,
|
outer_modifiers: Vec<yeast::Id>,
|
||||||
/// The `let`/`var` binding modifier for a `property_declaration`.
|
/// The `let`/`var` binding modifier for a `property_declaration`.
|
||||||
/// Set by `property_declaration`; read by the inner declaration
|
/// Set by `property_declaration`; read by the inner declaration
|
||||||
/// rules (`property_binding` variants, accessor rules) so they
|
/// rules (`property_binding` variants, accessor rules) so they
|
||||||
/// emit it as part of the output node's `modifier:` field.
|
/// emit it as part of the output node's `modifier:` field.
|
||||||
binding_modifier: Option<yeast::NodeRef>,
|
binding_modifier: Option<yeast::Id>,
|
||||||
/// True when the current child of a flattening outer rule is not
|
/// True when the current child of a flattening outer rule is not
|
||||||
/// the first one — its inner rule should emit a
|
/// the first one — its inner rule should emit a
|
||||||
/// `chained_declaration` modifier so the original grouping can be
|
/// `chained_declaration` modifier so the original grouping can be
|
||||||
@@ -45,10 +45,10 @@ struct SwiftContext {
|
|||||||
/// Build a freshly-created `chained_declaration` modifier node if
|
/// Build a freshly-created `chained_declaration` modifier node if
|
||||||
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
|
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
|
||||||
/// emit the chained tag for non-first children of a flattening outer
|
/// emit the chained tag for non-first children of a flattening outer
|
||||||
/// rule. Returns `Option<NodeRef>` so it splices via `{..…}` to 0 or 1 ids.
|
/// rule. Returns `Option<Id>` so it splices via `{..…}` to 0 or 1 ids.
|
||||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::NodeRef> {
|
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::Id> {
|
||||||
if ctx.is_chained {
|
if ctx.is_chained {
|
||||||
Some(ctx.literal("modifier", "chained_declaration").into())
|
Some(ctx.literal("modifier", "chained_declaration"))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
@@ -192,15 +192,21 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// this whole property_binding is itself a non-first declarator
|
// this whole property_binding is itself a non-first declarator
|
||||||
// of a containing property_declaration); subsequent accessors
|
// of a containing property_declaration); subsequent accessors
|
||||||
// always emit `chained_declaration`.
|
// always emit `chained_declaration`.
|
||||||
rule!(
|
manual_rule!(
|
||||||
(property_binding
|
(property_binding
|
||||||
name: @pattern
|
name: @pattern
|
||||||
type: _? @ty
|
type: _? @ty
|
||||||
computed_value: (computed_property accessor: _+ @@accessors))
|
computed_value: (computed_property accessor: _+ @accessors))
|
||||||
=>
|
{
|
||||||
{..{
|
// Translate `ty` first so the context holds an
|
||||||
ctx.property_name = Some(tree!((identifier #{pattern})).into());
|
// output-schema node id.
|
||||||
ctx.property_type = ty;
|
let translated_ty = ctx.translate_opt(ty)?;
|
||||||
|
// Build the property-name identifier from the
|
||||||
|
// (untranslated) pattern leaf.
|
||||||
|
let name_id = tree!((identifier #{pattern}));
|
||||||
|
|
||||||
|
ctx.property_name = Some(name_id);
|
||||||
|
ctx.property_type = translated_ty;
|
||||||
|
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (i, acc) in accessors.into_iter().enumerate() {
|
for (i, acc) in accessors.into_iter().enumerate() {
|
||||||
@@ -209,8 +215,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
}
|
}
|
||||||
result.extend(ctx.translate(acc)?);
|
result.extend(ctx.translate(acc)?);
|
||||||
}
|
}
|
||||||
result
|
Ok(result)
|
||||||
}}
|
}
|
||||||
),
|
),
|
||||||
// Computed property: shorthand getter (no explicit get/set, just
|
// Computed property: shorthand getter (no explicit get/set, just
|
||||||
// statements) → a single accessor_declaration with kind "get".
|
// statements) → a single accessor_declaration with kind "get".
|
||||||
@@ -242,26 +248,30 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// The `variable_declaration` itself inherits the outer rule's
|
// The `variable_declaration` itself inherits the outer rule's
|
||||||
// chained state; observers always get `chained_declaration`
|
// chained state; observers always get `chained_declaration`
|
||||||
// because they're subsequent outputs of this flattening rule.
|
// because they're subsequent outputs of this flattening rule.
|
||||||
rule!(
|
manual_rule!(
|
||||||
(property_binding
|
(property_binding
|
||||||
name: (pattern bound_identifier: @name)
|
name: (pattern bound_identifier: @name)
|
||||||
type: _? @ty
|
type: _? @ty
|
||||||
value: _? @val
|
value: _? @val
|
||||||
observers: (willset_didset_block willset: _? @@ws didset: _? @@ds))
|
observers: (willset_didset_block willset: _? @ws didset: _? @ds))
|
||||||
=>
|
{
|
||||||
{..{
|
// Translate ty and val so the variable_declaration
|
||||||
|
// below contains output-schema nodes.
|
||||||
|
let translated_ty = ctx.translate_opt(ty)?;
|
||||||
|
let translated_val = ctx.translate_opt(val)?;
|
||||||
|
|
||||||
let var_decl = tree!(
|
let var_decl = tree!(
|
||||||
(variable_declaration
|
(variable_declaration
|
||||||
modifier: {..ctx.binding_modifier}
|
modifier: {..ctx.binding_modifier}
|
||||||
modifier: {..ctx.outer_modifiers.clone()}
|
modifier: {..ctx.outer_modifiers.clone()}
|
||||||
modifier: {..chained_modifier(&mut ctx)}
|
modifier: {..chained_modifier(&mut ctx)}
|
||||||
pattern: (name_pattern identifier: (identifier #{name}))
|
pattern: (name_pattern identifier: (identifier #{name}))
|
||||||
type: {..ty}
|
type: {..translated_ty}
|
||||||
value: {..val})
|
value: {..translated_val})
|
||||||
);
|
);
|
||||||
|
|
||||||
// Publish the property name for the observer rules.
|
// Publish the property name for the observer rules.
|
||||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
ctx.property_name = Some(tree!((identifier #{name})));
|
||||||
// Observers are subsequent outputs of this flattening
|
// Observers are subsequent outputs of this flattening
|
||||||
// rule, so they always get `chained_declaration`.
|
// rule, so they always get `chained_declaration`.
|
||||||
ctx.is_chained = true;
|
ctx.is_chained = true;
|
||||||
@@ -270,8 +280,8 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
for obs in ws.into_iter().chain(ds) {
|
for obs in ws.into_iter().chain(ds) {
|
||||||
result.extend(ctx.translate(obs)?);
|
result.extend(ctx.translate(obs)?);
|
||||||
}
|
}
|
||||||
result
|
Ok(result)
|
||||||
}}
|
}
|
||||||
),
|
),
|
||||||
// property_binding with any pattern name (identifier or
|
// property_binding with any pattern name (identifier or
|
||||||
// destructuring). Reads outer modifiers / chained tag from `ctx`.
|
// destructuring). Reads outer modifiers / chained tag from `ctx`.
|
||||||
@@ -299,24 +309,27 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// inner declaration rules (`property_binding` variants,
|
// inner declaration rules (`property_binding` variants,
|
||||||
// accessor inner rules) read these fields and emit complete
|
// accessor inner rules) read these fields and emit complete
|
||||||
// `modifier:` lists from the start.
|
// `modifier:` lists from the start.
|
||||||
rule!(
|
manual_rule!(
|
||||||
(property_declaration
|
(property_declaration
|
||||||
binding: (value_binding_pattern mutability: @@binding_kind)
|
binding: (value_binding_pattern mutability: @binding_kind)
|
||||||
declarator: _* @@decls
|
declarator: _* @decls
|
||||||
(modifiers)* @mods)
|
(modifiers)* @mods)
|
||||||
=>
|
{
|
||||||
{..{
|
let binding_text = ctx.ast.source_text(binding_kind.0);
|
||||||
let binding_text = ctx.ast.source_text(binding_kind.into());
|
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text));
|
||||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text).into());
|
let mut modifiers = Vec::new();
|
||||||
ctx.outer_modifiers = mods;
|
for m in mods {
|
||||||
|
modifiers.extend(ctx.translate(m)?);
|
||||||
|
}
|
||||||
|
ctx.outer_modifiers = modifiers;
|
||||||
|
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (i, decl) in decls.into_iter().enumerate() {
|
for (i, decl) in decls.into_iter().enumerate() {
|
||||||
ctx.is_chained = i > 0;
|
ctx.is_chained = i > 0;
|
||||||
result.extend(ctx.translate(decl)?);
|
result.extend(ctx.translate(decl)?);
|
||||||
}
|
}
|
||||||
result
|
Ok(result)
|
||||||
}}
|
}
|
||||||
),
|
),
|
||||||
// ---- Enums ----
|
// ---- Enums ----
|
||||||
// enum_type_parameter → parameter (with optional name as pattern).
|
// enum_type_parameter → parameter (with optional name as pattern).
|
||||||
@@ -373,19 +386,22 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// into `ctx` and translate each case with `ctx.is_chained`
|
// into `ctx` and translate each case with `ctx.is_chained`
|
||||||
// toggled per iteration so the inner `enum_case_entry` rules
|
// toggled per iteration so the inner `enum_case_entry` rules
|
||||||
// emit complete `modifier:` lists from the start.
|
// emit complete `modifier:` lists from the start.
|
||||||
rule!(
|
manual_rule!(
|
||||||
(enum_entry case: _+ @@cases (modifiers)* @mods)
|
(enum_entry case: _+ @cases (modifiers)* @mods)
|
||||||
=>
|
{
|
||||||
{..{
|
let mut modifiers = Vec::new();
|
||||||
ctx.outer_modifiers = mods;
|
for m in mods {
|
||||||
|
modifiers.extend(ctx.translate(m)?);
|
||||||
|
}
|
||||||
|
ctx.outer_modifiers = modifiers;
|
||||||
|
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (i, case) in cases.into_iter().enumerate() {
|
for (i, case) in cases.into_iter().enumerate() {
|
||||||
ctx.is_chained = i > 0;
|
ctx.is_chained = i > 0;
|
||||||
result.extend(ctx.translate(case)?);
|
result.extend(ctx.translate(case)?);
|
||||||
}
|
}
|
||||||
result
|
Ok(result)
|
||||||
}}
|
}
|
||||||
),
|
),
|
||||||
// Plain assignment: `x = expr`
|
// Plain assignment: `x = expr`
|
||||||
rule!(
|
rule!(
|
||||||
@@ -460,13 +476,12 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// optional default values. Publishes the default value into `ctx`
|
// optional default values. Publishes the default value into `ctx`
|
||||||
// before translating the inner `parameter` so the `parameter`
|
// before translating the inner `parameter` so the `parameter`
|
||||||
// rules can include it as a `default:` field directly.
|
// rules can include it as a `default:` field directly.
|
||||||
rule!(
|
manual_rule!(
|
||||||
(function_parameter parameter: @@p default_value: _? @def)
|
(function_parameter parameter: @p default_value: _? @def)
|
||||||
=>
|
{
|
||||||
{..{
|
ctx.default_value = ctx.translate_opt(def)?;
|
||||||
ctx.default_value = def;
|
ctx.translate(p)
|
||||||
ctx.translate(p)?
|
}
|
||||||
}}
|
|
||||||
),
|
),
|
||||||
// Parameter with external name and type
|
// Parameter with external name and type
|
||||||
rule!(
|
rule!(
|
||||||
@@ -1011,25 +1026,28 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
|||||||
// inner `getter_specifier`/`setter_specifier` rules emit
|
// inner `getter_specifier`/`setter_specifier` rules emit
|
||||||
// complete nodes from the start (including the
|
// complete nodes from the start (including the
|
||||||
// `chained_declaration` tag for non-first accessors).
|
// `chained_declaration` tag for non-first accessors).
|
||||||
rule!(
|
manual_rule!(
|
||||||
(protocol_property_declaration
|
(protocol_property_declaration
|
||||||
name: (pattern bound_identifier: @name)
|
name: (pattern bound_identifier: @name)
|
||||||
requirements: (protocol_property_requirements accessor: _+ @@accessors)
|
requirements: (protocol_property_requirements accessor: _+ @accessors)
|
||||||
type: _? @ty
|
type: _? @ty
|
||||||
(modifiers)* @mods)
|
(modifiers)* @mods)
|
||||||
=>
|
{
|
||||||
{..{
|
ctx.property_name = Some(tree!((identifier #{name})));
|
||||||
ctx.property_name = Some(tree!((identifier #{name})).into());
|
ctx.property_type = ctx.translate_opt(ty)?;
|
||||||
ctx.property_type = ty;
|
let mut modifiers = Vec::new();
|
||||||
ctx.outer_modifiers = mods;
|
for m in mods {
|
||||||
|
modifiers.extend(ctx.translate(m)?);
|
||||||
|
}
|
||||||
|
ctx.outer_modifiers = modifiers;
|
||||||
|
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for (i, acc) in accessors.into_iter().enumerate() {
|
for (i, acc) in accessors.into_iter().enumerate() {
|
||||||
ctx.is_chained = i > 0;
|
ctx.is_chained = i > 0;
|
||||||
result.extend(ctx.translate(acc)?);
|
result.extend(ctx.translate(acc)?);
|
||||||
}
|
}
|
||||||
result
|
Ok(result)
|
||||||
}}
|
}
|
||||||
),
|
),
|
||||||
// getter_specifier / setter_specifier → bodyless accessor_declaration
|
// getter_specifier / setter_specifier → bodyless accessor_declaration
|
||||||
// getter_specifier / setter_specifier → bodyless
|
// getter_specifier / setter_specifier → bodyless
|
||||||
|
|||||||
Reference in New Issue
Block a user