Compare commits

..

2 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
2e6bc6612c Update inline expectations and relearn affected tests 2026-06-26 08:07:15 +00:00
copilot-swe-agent[bot]
5c2614283d Initial plan 2026-06-26 07:28:39 +00:00
63 changed files with 468 additions and 2797 deletions

View File

@@ -3,13 +3,13 @@ class C
void Problems()
{
// correct expectation comment, but only for `problem-query`
var x = "Alert"; // $ Alert
var x = "Alert"; // $ Alert[problem-query]
// irrelevant expectation comment, will be ignored
x = "Not an alert"; // $ IrrelevantTag
// incorrect expectation comment
x = "Also not an alert"; // $ Alert
x = "Also not an alert"; // $ MISSING: Alert[problem-query]
// missing expectation comment, but only for `problem-query`
x = "Alert";

View File

@@ -13,8 +13,6 @@
| InlineTests.cs:88:13:88:23 | "Alert:0:1" | InlineTests.cs:88:13:88:23 | "Alert:0:1" | InlineTests.cs:87:16:87:21 | "Sink" | This is a problem |
edges
testFailures
| InlineTests.cs:6:26:6:35 | // ... | Missing result: Alert |
| InlineTests.cs:12:34:12:43 | // ... | Missing result: Alert |
| InlineTests.cs:37:28:37:38 | // ... | Missing result: Source |
| InlineTests.cs:38:24:38:32 | // ... | Missing result: Sink |
| InlineTests.cs:39:33:39:42 | // ... | Missing result: Alert |

View File

@@ -3,8 +3,6 @@
| InlineTests.cs:100:13:100:25 | "Alert:3:2:1" | InlineTests.cs:97:18:97:25 | "Source" | InlineTests.cs:98:16:98:21 | "Sink" | This is a problem with $@ | InlineTests.cs:99:19:99:27 | "Related" | a related location |
edges
testFailures
| InlineTests.cs:6:26:6:35 | // ... | Missing result: Alert |
| InlineTests.cs:12:34:12:43 | // ... | Missing result: Alert |
| InlineTests.cs:32:32:32:42 | // ... | Missing result: Source |
| InlineTests.cs:33:28:33:36 | // ... | Missing result: Sink |
| InlineTests.cs:34:30:34:39 | // ... | Missing result: Alert |

View File

@@ -3,7 +3,6 @@
| InlineTests.cs:15:13:15:19 | "Alert" | This is a problem |
| InlineTests.cs:18:13:18:19 | "Alert" | This is a problem |
testFailures
| InlineTests.cs:12:34:12:43 | // ... | Missing result: Alert |
| InlineTests.cs:15:13:15:19 | This is a problem | Unexpected result: Alert |
| InlineTests.cs:34:30:34:39 | // ... | Missing result: Alert |
| InlineTests.cs:39:33:39:42 | // ... | Missing result: Alert |

View File

@@ -2,8 +2,6 @@
| InlineTests.cs:22:13:22:21 | "Alert:1" | This is a problem with $@ | InlineTests.cs:21:23:21:31 | "Related" | a related location |
| InlineTests.cs:26:13:26:21 | "Alert:1" | This is a problem with $@ | InlineTests.cs:25:19:25:27 | "Related" | a related location |
testFailures
| InlineTests.cs:6:26:6:35 | // ... | Missing result: Alert |
| InlineTests.cs:12:34:12:43 | // ... | Missing result: Alert |
| InlineTests.cs:25:19:25:27 | "Related" | Unexpected result: RelatedLocation |
| InlineTests.cs:34:30:34:39 | // ... | Missing result: Alert |
| InlineTests.cs:39:33:39:42 | // ... | Missing result: Alert |

View File

@@ -33,11 +33,9 @@ module StoredXss {
walkFn.getACall().getArgument(1) = f.getASuccessor*()
)
or
// The return value of a call to `os.DirEntry.Name`, `os.FileInfo.Name`
// or `os.File.ReadDirNames`.
exists(DataFlow::CallNode cn, Method m | m = cn.getTarget() and this = cn.getResult(0) |
m.implements("io/fs", ["DirEntry", "FileInfo"], "Name") or
m.hasQualifiedName("os", "File", "ReadDirNames")
// A call to os.FileInfo.Name
exists(Method m | m.implements("io/fs", "FileInfo", "Name") |
m = this.(DataFlow::CallNode).getTarget()
)
}
}

View File

@@ -156,3 +156,12 @@ nodes
| websocketXss.go:54:3:54:38 | ... := ...[1] | semmle.label | ... := ...[1] |
| websocketXss.go:55:24:55:31 | gorilla3 | semmle.label | gorilla3 |
subpaths
testFailures
| websocketXss.go:30:32:30:60 | comment | Missing result: Source[go/reflected-xss] |
| websocketXss.go:31:11:31:14 | xnet [postupdate] | Unexpected result: Source |
| websocketXss.go:34:30:34:58 | comment | Missing result: Source[go/reflected-xss] |
| websocketXss.go:35:21:35:25 | xnet2 [postupdate] | Unexpected result: Source |
| websocketXss.go:46:38:46:66 | comment | Missing result: Source[go/reflected-xss] |
| websocketXss.go:47:26:47:35 | gorillaMsg [postupdate] | Unexpected result: Source |
| websocketXss.go:50:33:50:61 | comment | Missing result: Source[go/reflected-xss] |
| websocketXss.go:51:17:51:24 | gorilla2 [postupdate] | Unexpected result: Source |

View File

@@ -1,9 +1,7 @@
#select
| StoredXss.go:13:21:13:36 | ...+... | StoredXss.go:13:21:13:31 | call to Name | StoredXss.go:13:21:13:36 | ...+... | Stored cross-site scripting vulnerability due to $@. | StoredXss.go:13:21:13:31 | call to Name | stored value |
| stored.go:30:22:30:25 | name | stored.go:18:3:18:28 | ... := ...[0] | stored.go:30:22:30:25 | name | Stored cross-site scripting vulnerability due to $@. | stored.go:18:3:18:28 | ... := ...[0] | stored value |
| stored.go:61:22:61:25 | path | stored.go:59:30:59:33 | SSA def(path) | stored.go:61:22:61:25 | path | Stored cross-site scripting vulnerability due to $@. | stored.go:59:30:59:33 | SSA def(path) | stored value |
edges
| StoredXss.go:13:21:13:31 | call to Name | StoredXss.go:13:21:13:36 | ...+... | provenance | |
| stored.go:18:3:18:28 | ... := ...[0] | stored.go:25:14:25:17 | rows | provenance | Src:MaD:1 |
| stored.go:25:14:25:17 | rows | stored.go:25:29:25:33 | &... [postupdate] | provenance | FunctionModel |
| stored.go:25:29:25:33 | &... [postupdate] | stored.go:30:22:30:25 | name | provenance | |
@@ -11,8 +9,6 @@ edges
models
| 1 | Source: database/sql; DB; true; Query; ; ; ReturnValue[0]; database; manual |
nodes
| StoredXss.go:13:21:13:31 | call to Name | semmle.label | call to Name |
| StoredXss.go:13:21:13:36 | ...+... | semmle.label | ...+... |
| stored.go:18:3:18:28 | ... := ...[0] | semmle.label | ... := ...[0] |
| stored.go:25:14:25:17 | rows | semmle.label | rows |
| stored.go:25:29:25:33 | &... [postupdate] | semmle.label | &... [postupdate] |
@@ -20,3 +16,5 @@ nodes
| stored.go:59:30:59:33 | SSA def(path) | semmle.label | SSA def(path) |
| stored.go:61:22:61:25 | path | semmle.label | path |
subpaths
testFailures
| StoredXss.go:13:39:13:63 | comment | Missing result: Alert[go/stored-xss] |

View File

@@ -27,12 +27,12 @@ func xss(w http.ResponseWriter, r *http.Request) {
origin := "test"
{
ws, _ := websocket.Dial(uri, "", origin)
var xnet = make([]byte, 512)
ws.Read(xnet) // $ Source[go/reflected-xss]
var xnet = make([]byte, 512) // $ Source[go/reflected-xss]
ws.Read(xnet)
fmt.Fprintf(w, "%v", xnet) // $ Alert[go/reflected-xss]
codec := &websocket.Codec{Marshal: marshal, Unmarshal: unmarshal}
xnet2 := make([]byte, 512)
codec.Receive(ws, xnet2) // $ Source[go/reflected-xss]
xnet2 := make([]byte, 512) // $ Source[go/reflected-xss]
codec.Receive(ws, xnet2)
fmt.Fprintf(w, "%v", xnet2) // $ Alert[go/reflected-xss]
}
{
@@ -43,12 +43,12 @@ func xss(w http.ResponseWriter, r *http.Request) {
{
dialer := gorilla.Dialer{}
conn, _, _ := dialer.Dial(uri, nil)
var gorillaMsg = make([]byte, 512)
gorilla.ReadJSON(conn, gorillaMsg) // $ Source[go/reflected-xss]
fmt.Fprintf(w, "%v", gorillaMsg) // $ Alert[go/reflected-xss]
var gorillaMsg = make([]byte, 512) // $ Source[go/reflected-xss]
gorilla.ReadJSON(conn, gorillaMsg)
fmt.Fprintf(w, "%v", gorillaMsg) // $ Alert[go/reflected-xss]
gorilla2 := make([]byte, 512)
conn.ReadJSON(gorilla2) // $ Source[go/reflected-xss]
gorilla2 := make([]byte, 512) // $ Source[go/reflected-xss]
conn.ReadJSON(gorilla2)
fmt.Fprintf(w, "%v", gorilla2) // $ Alert[go/reflected-xss]
_, gorilla3, _ := conn.ReadMessage() // $ Source[go/reflected-xss]

View File

@@ -30,7 +30,5 @@ nodes
| BadMacUse.java:152:42:152:51 | ciphertext | semmle.label | ciphertext |
subpaths
testFailures
| BadMacUse.java:50:56:50:66 | // $ Source | Missing result: Source |
| BadMacUse.java:63:118:63:128 | // $ Source | Missing result: Source |
| BadMacUse.java:92:31:92:35 | bytes : byte[] | Unexpected result: Source |
| BadMacUse.java:146:95:146:105 | // $ Source | Missing result: Source |

View File

@@ -31,7 +31,7 @@ nodes
| BadMacUse.java:124:42:124:51 | ciphertext | semmle.label | ciphertext |
subpaths
testFailures
| BadMacUse.java:63:118:63:128 | // $ Source | Missing result: Source |
| BadMacUse.java:50:28:50:53 | doFinal(...) : byte[] | Fixed missing result: Source |
| BadMacUse.java:92:16:92:36 | doFinal(...) : byte[] | Unexpected result: Source |
| BadMacUse.java:124:42:124:51 | ciphertext | Unexpected result: Alert |
| BadMacUse.java:146:95:146:105 | // $ Source | Missing result: Source |

View File

@@ -45,7 +45,7 @@ nodes
| BadMacUse.java:152:42:152:51 | ciphertext | semmle.label | ciphertext |
subpaths
testFailures
| BadMacUse.java:50:56:50:66 | // $ Source | Missing result: Source |
| BadMacUse.java:63:82:63:97 | plaintext : byte[] | Fixed missing result: Source |
| BadMacUse.java:139:79:139:90 | input : byte[] | Unexpected result: Source |
| BadMacUse.java:146:95:146:105 | // $ Source | Missing result: Source |
| BadMacUse.java:152:42:152:51 | ciphertext | Unexpected result: Alert |

View File

@@ -47,7 +47,7 @@ class BadMacUse {
SecretKey encryptionKey = new SecretKeySpec(encryptionKeyBytes, "AES");
Cipher cipher = Cipher.getInstance("AES/GCM/NoPadding");
cipher.init(Cipher.DECRYPT_MODE, encryptionKey, new SecureRandom());
byte[] plaintext = cipher.doFinal(ciphertext); // $ Source
byte[] plaintext = cipher.doFinal(ciphertext); // $ MISSING: Source
// Now verify MAC (too late)
SecretKey macKey = new SecretKeySpec(macKeyBytes, "HmacSHA256");
@@ -60,7 +60,7 @@ class BadMacUse {
}
}
public void BadMacOnPlaintext(byte[] encryptionKeyBytes, byte[] macKeyBytes, byte[] plaintext) throws Exception {// $ Source
public void BadMacOnPlaintext(byte[] encryptionKeyBytes, byte[] macKeyBytes, byte[] plaintext) throws Exception {// $ MISSING: Source
// Create keys directly from provided byte arrays
SecretKey encryptionKey = new SecretKeySpec(encryptionKeyBytes, "AES");
SecretKey macKey = new SecretKeySpec(macKeyBytes, "HmacSHA256");

View File

@@ -126,5 +126,3 @@ nodes
| InsecureIVorNonceSource.java:202:54:202:55 | iv : byte[] | semmle.label | iv : byte[] |
| InsecureIVorNonceSource.java:206:51:206:56 | ivSpec | semmle.label | ivSpec |
subpaths
testFailures
| InsecureIVorNonceSource.java:42:21:42:21 | 1 : Number | Unexpected result: Source |

View File

@@ -39,7 +39,7 @@ public class InsecureIVorNonceSource {
public byte[] encryptWithStaticIvByteArray(byte[] key, byte[] plaintext) throws Exception {
byte[] iv = new byte[16];
for (byte i = 0; i < iv.length; i++) {
iv[i] = 1;
iv[i] = 1; // $ Source
}
IvParameterSpec ivSpec = new IvParameterSpec(iv);

View File

@@ -40,11 +40,11 @@ public class Test {
* SAST/CBOM: - Parent: PBKDF2. - Iteration count is only 10, which is far
* below acceptable security standards. - Flagged as insecure.
*/
public void pbkdf2LowIteration(String password, int iterationCount) throws Exception { // $ Source
public void pbkdf2LowIteration(String password, int iterationCount) throws Exception { // $ MISSING: Source
byte[] salt = generateSalt(16);
PBEKeySpec spec = new PBEKeySpec(password.toCharArray(), salt, iterationCount, 256); // $ Alert[java/quantum/examples/unknown-kdf-iteration-count]
PBEKeySpec spec = new PBEKeySpec(password.toCharArray(), salt, iterationCount, 256);
SecretKeyFactory factory = SecretKeyFactory.getInstance("PBKDF2WithHmacSHA256");
byte[] key = factory.generateSecret(spec).getEncoded();
byte[] key = factory.generateSecret(spec).getEncoded(); // $ Alert[java/quantum/examples/unknown-kdf-iteration-count]
}
/**

View File

@@ -1,5 +1 @@
#select
| Test.java:47:22:47:49 | KeyDerivation | Key derivation operation with unknown iteration: $@ | Test.java:43:53:43:70 | iterationCount | iterationCount |
testFailures
| Test.java:45:94:45:154 | // $ Alert[java/quantum/examples/unknown-kdf-iteration-count] | Missing result: Alert[java/quantum/examples/unknown-kdf-iteration-count] |
| Test.java:47:22:47:49 | Key derivation operation with unknown iteration: $@ | Unexpected result: Alert |

View File

@@ -12,5 +12,3 @@ nodes
| Test.java:58:30:58:38 | 1_000_000 : Number | semmle.label | 1_000_000 : Number |
| Test.java:59:72:59:85 | iterationCount | semmle.label | iterationCount |
subpaths
testFailures
| Test.java:43:92:43:102 | // $ Source | Missing result: Source |

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added support for Angular's `@HostListener('window:message', ...)` and `@HostListener('document:message', ...)` decorators as `postMessage` event handlers. The decorated method's event parameter is now recognized as a client-side remote flow source, and is considered by the `js/missing-origin-check` query.

View File

@@ -195,18 +195,6 @@ class PostMessageEventHandler extends Function {
rhs = DataFlow::globalObjectRef().getAPropertyWrite("onmessage").getRhs() and
rhs.getABoundFunctionValue(paramIndex).getFunction() = this
)
or
// Angular's `@HostListener('window:message', ['$event'])` decorator registers
// a method as a `message` event handler on the global `window` or `document`
// target. The decorated method receives the `MessageEvent` as its first
// parameter, so it is equivalent to `window.addEventListener('message', ...)`.
exists(MethodDefinition method, DataFlow::CallNode decorator |
decorator = DataFlow::moduleMember("@angular/core", "HostListener").getACall() and
decorator = method.getADecorator().getExpression().flow() and
decorator.getArgument(0).mayHaveStringValue(["window:message", "document:message"]) and
method.getBody() = this and
paramIndex = 0
)
}
/**

View File

@@ -1,29 +0,0 @@
import { Component, HostListener } from '@angular/core';
@Component({ selector: 'app-root' })
class AngularComponent {
// Angular registers this as a `window` message handler via the decorator,
// equivalent to `window.addEventListener('message', ...)`.
@HostListener('window:message', ['$event'])
onWindowMessage(event: MessageEvent): void { // $ Alert - no origin check
eval(event.data);
}
@HostListener('document:message', ['$event'])
onDocumentMessage(event: MessageEvent): void { // $ Alert - no origin check
eval(event.data);
}
@HostListener('window:message', ['$event'])
onCheckedMessage(event: MessageEvent): void { // OK - has an origin check
if (event.origin === 'https://www.example.com') {
eval(event.data);
}
}
// Not a message event, so it is not a postMessage handler.
@HostListener('window:resize', ['$event'])
onResize(event: MessageEvent): void { // OK - not a message handler
eval(event.data);
}
}

View File

@@ -1,5 +1,3 @@
| Angular.ts:8:19:8:23 | event | Postmessage handler has no origin check. |
| Angular.ts:13:21:13:25 | event | Postmessage handler has no origin check. |
| tst.js:11:20:11:24 | event | Postmessage handler has no origin check. |
| tst.js:24:27:24:27 | e | Postmessage handler has no origin check. |
| tst.js:40:27:40:27 | e | Postmessage handler has no origin check. |

View File

@@ -3,5 +3,5 @@ argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
| taint_test.py:32:9:32:25 | taint_test.py:32 | ERROR, you should add `# $ MISSING: tainted` annotation | should_be_tainted |
| taint_test.py:37:24:37:40 | taint_test.py:37 | ERROR, you should add `# $ MISSING: tainted` annotation | should_be_tainted |
| taint_test.py:41:24:41:40 | taint_test.py:41 | ERROR, you should add `# $ MISSING: tainted` annotation | should_be_tainted |
testFailures
| taint_test.py:41:20:41:21 | ts | Fixed missing result: tainted |

View File

@@ -38,7 +38,7 @@ def bad_usage():
# if you try to get around it by adding BOTH annotations, that results in a problem
# from the default set of inline-test-expectation rules
ensure_tainted(ts, should_be_tainted) # $ tainted MISSING: tainted
ensure_tainted(ts, should_be_tainted) # $ tainted
# simulating handling something we _want_ to treat at untainted, but we currently treat as tainted
should_not_be_tainted = "pretend this is now safe" + ts

View File

@@ -1312,244 +1312,6 @@ module QL {
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { ql_variable_def(this, result) }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(AddExpr).getLeft() and i = -1 and name = "getLeft"
or
result = node.(AddExpr).getRight() and i = -1 and name = "getRight"
or
result = node.(AddExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(Aggregate).getChild(i) and name = "getChild"
or
result = node.(AnnotArg).getChild() and i = -1 and name = "getChild"
or
result = node.(Annotation).getArgs(i) and name = "getArgs"
or
result = node.(Annotation).getName() and i = -1 and name = "getName"
or
result = node.(AritylessPredicateExpr).getName() and i = -1 and name = "getName"
or
result = node.(AritylessPredicateExpr).getQualifier() and i = -1 and name = "getQualifier"
or
result = node.(AsExpr).getChild(i) and name = "getChild"
or
result = node.(AsExprs).getChild(i) and name = "getChild"
or
result = node.(Body).getChild() and i = -1 and name = "getChild"
or
result = node.(Bool).getChild() and i = -1 and name = "getChild"
or
result = node.(CallBody).getChild(i) and name = "getChild"
or
result = node.(CallOrUnqualAggExpr).getChild(i) and name = "getChild"
or
result = node.(Charpred).getBody() and i = -1 and name = "getBody"
or
result = node.(Charpred).getChild() and i = -1 and name = "getChild"
or
result = node.(ClassMember).getChild(i) and name = "getChild"
or
result = node.(ClasslessPredicate).getName() and i = -1 and name = "getName"
or
result = node.(ClasslessPredicate).getReturnType() and i = -1 and name = "getReturnType"
or
result = node.(ClasslessPredicate).getChild(i) and name = "getChild"
or
result = node.(CompTerm).getLeft() and i = -1 and name = "getLeft"
or
result = node.(CompTerm).getRight() and i = -1 and name = "getRight"
or
result = node.(CompTerm).getChild() and i = -1 and name = "getChild"
or
result = node.(Conjunction).getLeft() and i = -1 and name = "getLeft"
or
result = node.(Conjunction).getRight() and i = -1 and name = "getRight"
or
result = node.(Dataclass).getExtends(i) and name = "getExtends"
or
result = node.(Dataclass).getInstanceof(i) and name = "getInstanceof"
or
result = node.(Dataclass).getName() and i = -1 and name = "getName"
or
result = node.(Dataclass).getChild(i) and name = "getChild"
or
result = node.(Datatype).getName() and i = -1 and name = "getName"
or
result = node.(Datatype).getChild() and i = -1 and name = "getChild"
or
result = node.(DatatypeBranch).getName() and i = -1 and name = "getName"
or
result = node.(DatatypeBranch).getChild(i) and name = "getChild"
or
result = node.(DatatypeBranches).getChild(i) and name = "getChild"
or
result = node.(Disjunction).getLeft() and i = -1 and name = "getLeft"
or
result = node.(Disjunction).getRight() and i = -1 and name = "getRight"
or
result = node.(ExprAggregateBody).getAsExprs() and i = -1 and name = "getAsExprs"
or
result = node.(ExprAggregateBody).getOrderBys() and i = -1 and name = "getOrderBys"
or
result = node.(ExprAnnotation).getAnnotArg() and i = -1 and name = "getAnnotArg"
or
result = node.(ExprAnnotation).getName() and i = -1 and name = "getName"
or
result = node.(ExprAnnotation).getChild() and i = -1 and name = "getChild"
or
result = node.(Field).getChild() and i = -1 and name = "getChild"
or
result = node.(FullAggregateBody).getAsExprs() and i = -1 and name = "getAsExprs"
or
result = node.(FullAggregateBody).getGuard() and i = -1 and name = "getGuard"
or
result = node.(FullAggregateBody).getOrderBys() and i = -1 and name = "getOrderBys"
or
result = node.(FullAggregateBody).getChild(i) and name = "getChild"
or
result = node.(HigherOrderTerm).getName() and i = -1 and name = "getName"
or
result = node.(HigherOrderTerm).getChild(i) and name = "getChild"
or
result = node.(IfTerm).getCond() and i = -1 and name = "getCond"
or
result = node.(IfTerm).getFirst() and i = -1 and name = "getFirst"
or
result = node.(IfTerm).getSecond() and i = -1 and name = "getSecond"
or
result = node.(Implication).getLeft() and i = -1 and name = "getLeft"
or
result = node.(Implication).getRight() and i = -1 and name = "getRight"
or
result = node.(ImportDirective).getChild(i) and name = "getChild"
or
result = node.(ImportModuleExpr).getQualName(i) and name = "getQualName"
or
result = node.(ImportModuleExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(InExpr).getLeft() and i = -1 and name = "getLeft"
or
result = node.(InExpr).getRight() and i = -1 and name = "getRight"
or
result = node.(InstanceOf).getChild(i) and name = "getChild"
or
result = node.(Literal).getChild() and i = -1 and name = "getChild"
or
result = node.(MemberPredicate).getName() and i = -1 and name = "getName"
or
result = node.(MemberPredicate).getReturnType() and i = -1 and name = "getReturnType"
or
result = node.(MemberPredicate).getChild(i) and name = "getChild"
or
result = node.(Module).getImplements(i) and name = "getImplements"
or
result = node.(Module).getName() and i = -1 and name = "getName"
or
result = node.(Module).getParameter(i) and name = "getParameter"
or
result = node.(Module).getChild(i) and name = "getChild"
or
result = node.(ModuleAliasBody).getChild() and i = -1 and name = "getChild"
or
result = node.(ModuleExpr).getName() and i = -1 and name = "getName"
or
result = node.(ModuleExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(ModuleInstantiation).getName() and i = -1 and name = "getName"
or
result = node.(ModuleInstantiation).getChild(i) and name = "getChild"
or
result = node.(ModuleMember).getChild(i) and name = "getChild"
or
result = node.(ModuleName).getChild() and i = -1 and name = "getChild"
or
result = node.(ModuleParam).getParameter() and i = -1 and name = "getParameter"
or
result = node.(ModuleParam).getSignature() and i = -1 and name = "getSignature"
or
result = node.(MulExpr).getLeft() and i = -1 and name = "getLeft"
or
result = node.(MulExpr).getRight() and i = -1 and name = "getRight"
or
result = node.(MulExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(Negation).getChild() and i = -1 and name = "getChild"
or
result = node.(OrderBy).getChild(i) and name = "getChild"
or
result = node.(OrderBys).getChild(i) and name = "getChild"
or
result = node.(ParExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(PredicateAliasBody).getChild() and i = -1 and name = "getChild"
or
result = node.(PredicateExpr).getChild(i) and name = "getChild"
or
result = node.(PrefixCast).getChild(i) and name = "getChild"
or
result = node.(Ql).getChild(i) and name = "getChild"
or
result = node.(QualifiedRhs).getName() and i = -1 and name = "getName"
or
result = node.(QualifiedRhs).getChild(i) and name = "getChild"
or
result = node.(QualifiedExpr).getChild(i) and name = "getChild"
or
result = node.(Quantified).getExpr() and i = -1 and name = "getExpr"
or
result = node.(Quantified).getFormula() and i = -1 and name = "getFormula"
or
result = node.(Quantified).getRange() and i = -1 and name = "getRange"
or
result = node.(Quantified).getChild(i) and name = "getChild"
or
result = node.(Range).getLower() and i = -1 and name = "getLower"
or
result = node.(Range).getUpper() and i = -1 and name = "getUpper"
or
result = node.(Select).getChild(i) and name = "getChild"
or
result = node.(SetLiteral).getChild(i) and name = "getChild"
or
result = node.(SignatureExpr).getModExpr() and i = -1 and name = "getModExpr"
or
result = node.(SignatureExpr).getPredicate() and i = -1 and name = "getPredicate"
or
result = node.(SignatureExpr).getTypeExpr() and i = -1 and name = "getTypeExpr"
or
result = node.(SpecialCall).getChild() and i = -1 and name = "getChild"
or
result = node.(SuperRef).getChild(i) and name = "getChild"
or
result = node.(TypeAliasBody).getChild() and i = -1 and name = "getChild"
or
result = node.(TypeExpr).getName() and i = -1 and name = "getName"
or
result = node.(TypeExpr).getQualifier() and i = -1 and name = "getQualifier"
or
result = node.(TypeExpr).getChild() and i = -1 and name = "getChild"
or
result = node.(TypeUnionBody).getChild(i) and name = "getChild"
or
result = node.(UnaryExpr).getChild(i) and name = "getChild"
or
result = node.(UnqualAggBody).getAsExprs(i) and name = "getAsExprs"
or
result = node.(UnqualAggBody).getGuard() and i = -1 and name = "getGuard"
or
result = node.(UnqualAggBody).getChild(i) and name = "getChild"
or
result = node.(VarDecl).getChild(i) and name = "getChild"
or
result = node.(VarName).getChild() and i = -1 and name = "getChild"
or
result = node.(Variable).getChild() and i = -1 and name = "getChild"
}
}
}
overlay[local]
@@ -1907,60 +1669,6 @@ module Dbscheme {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Varchar" }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(Annotation).getArgsAnnotation() and i = -1 and name = "getArgsAnnotation"
or
result = node.(Annotation).getSimpleAnnotation() and i = -1 and name = "getSimpleAnnotation"
or
result = node.(ArgsAnnotation).getName() and i = -1 and name = "getName"
or
result = node.(ArgsAnnotation).getChild(i) and name = "getChild"
or
result = node.(Branch).getQldoc() and i = -1 and name = "getQldoc"
or
result = node.(Branch).getChild(i) and name = "getChild"
or
result = node.(CaseDecl).getBase() and i = -1 and name = "getBase"
or
result = node.(CaseDecl).getDiscriminator() and i = -1 and name = "getDiscriminator"
or
result = node.(CaseDecl).getChild(i) and name = "getChild"
or
result = node.(ColType).getChild() and i = -1 and name = "getChild"
or
result = node.(Column).getColName() and i = -1 and name = "getColName"
or
result = node.(Column).getColType() and i = -1 and name = "getColType"
or
result = node.(Column).getIsRef() and i = -1 and name = "getIsRef"
or
result = node.(Column).getIsUnique() and i = -1 and name = "getIsUnique"
or
result = node.(Column).getQldoc() and i = -1 and name = "getQldoc"
or
result = node.(Column).getReprType() and i = -1 and name = "getReprType"
or
result = node.(Dbscheme).getChild(i) and name = "getChild"
or
result = node.(Entry).getChild() and i = -1 and name = "getChild"
or
result = node.(ReprType).getChild(i) and name = "getChild"
or
result = node.(Table).getTableName() and i = -1 and name = "getTableName"
or
result = node.(Table).getChild(i) and name = "getChild"
or
result = node.(TableName).getChild() and i = -1 and name = "getChild"
or
result = node.(UnionDecl).getBase() and i = -1 and name = "getBase"
or
result = node.(UnionDecl).getChild(i) and name = "getChild"
}
}
}
overlay[local]
@@ -2095,24 +1803,6 @@ module Blame {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Number" }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(BlameEntry).getDate() and i = -1 and name = "getDate"
or
result = node.(BlameEntry).getLine(i) and name = "getLine"
or
result = node.(BlameInfo).getFileEntry(i) and name = "getFileEntry"
or
result = node.(BlameInfo).getToday() and i = -1 and name = "getToday"
or
result = node.(FileEntry).getBlameEntry(i) and name = "getBlameEntry"
or
result = node.(FileEntry).getFileName() and i = -1 and name = "getFileName"
}
}
}
overlay[local]
@@ -2287,22 +1977,4 @@ module JSON {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "True" }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(Array).getChild(i) and name = "getChild"
or
result = node.(Document).getChild(i) and name = "getChild"
or
result = node.(Object).getChild(i) and name = "getChild"
or
result = node.(Pair).getKey() and i = -1 and name = "getKey"
or
result = node.(Pair).getValue() and i = -1 and name = "getValue"
or
result = node.(String).getChild(i) and name = "getChild"
}
}
}

View File

@@ -1964,340 +1964,6 @@ module Ruby {
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { ruby_yield_child(this, result) }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(Alias).getAlias() and i = -1 and name = "getAlias"
or
result = node.(Alias).getName() and i = -1 and name = "getName"
or
result = node.(AlternativePattern).getAlternatives(i) and name = "getAlternatives"
or
result = node.(ArgumentList).getChild(i) and name = "getChild"
or
result = node.(Array).getChild(i) and name = "getChild"
or
result = node.(ArrayPattern).getClass() and i = -1 and name = "getClass"
or
result = node.(ArrayPattern).getChild(i) and name = "getChild"
or
result = node.(AsPattern).getName() and i = -1 and name = "getName"
or
result = node.(AsPattern).getValue() and i = -1 and name = "getValue"
or
result = node.(Assignment).getLeft() and i = -1 and name = "getLeft"
or
result = node.(Assignment).getRight() and i = -1 and name = "getRight"
or
result = node.(BareString).getChild(i) and name = "getChild"
or
result = node.(BareSymbol).getChild(i) and name = "getChild"
or
result = node.(Begin).getChild(i) and name = "getChild"
or
result = node.(BeginBlock).getChild(i) and name = "getChild"
or
result = node.(Binary).getLeft() and i = -1 and name = "getLeft"
or
result = node.(Binary).getRight() and i = -1 and name = "getRight"
or
result = node.(Block).getBody() and i = -1 and name = "getBody"
or
result = node.(Block).getParameters() and i = -1 and name = "getParameters"
or
result = node.(BlockArgument).getChild() and i = -1 and name = "getChild"
or
result = node.(BlockBody).getChild(i) and name = "getChild"
or
result = node.(BlockParameter).getName() and i = -1 and name = "getName"
or
result = node.(BlockParameters).getLocals(i) and name = "getLocals"
or
result = node.(BlockParameters).getChild(i) and name = "getChild"
or
result = node.(BodyStatement).getChild(i) and name = "getChild"
or
result = node.(Break).getChild() and i = -1 and name = "getChild"
or
result = node.(Call).getArguments() and i = -1 and name = "getArguments"
or
result = node.(Call).getBlock() and i = -1 and name = "getBlock"
or
result = node.(Call).getMethod() and i = -1 and name = "getMethod"
or
result = node.(Call).getOperator() and i = -1 and name = "getOperator"
or
result = node.(Call).getReceiver() and i = -1 and name = "getReceiver"
or
result = node.(Case).getValue() and i = -1 and name = "getValue"
or
result = node.(Case).getChild(i) and name = "getChild"
or
result = node.(CaseMatch).getClauses(i) and name = "getClauses"
or
result = node.(CaseMatch).getElse() and i = -1 and name = "getElse"
or
result = node.(CaseMatch).getValue() and i = -1 and name = "getValue"
or
result = node.(ChainedString).getChild(i) and name = "getChild"
or
result = node.(Class).getBody() and i = -1 and name = "getBody"
or
result = node.(Class).getName() and i = -1 and name = "getName"
or
result = node.(Class).getSuperclass() and i = -1 and name = "getSuperclass"
or
result = node.(Complex).getChild() and i = -1 and name = "getChild"
or
result = node.(Conditional).getAlternative() and i = -1 and name = "getAlternative"
or
result = node.(Conditional).getCondition() and i = -1 and name = "getCondition"
or
result = node.(Conditional).getConsequence() and i = -1 and name = "getConsequence"
or
result = node.(DelimitedSymbol).getChild(i) and name = "getChild"
or
result = node.(DestructuredLeftAssignment).getChild(i) and name = "getChild"
or
result = node.(DestructuredParameter).getChild(i) and name = "getChild"
or
result = node.(Do).getChild(i) and name = "getChild"
or
result = node.(DoBlock).getBody() and i = -1 and name = "getBody"
or
result = node.(DoBlock).getParameters() and i = -1 and name = "getParameters"
or
result = node.(ElementReference).getBlock() and i = -1 and name = "getBlock"
or
result = node.(ElementReference).getObject() and i = -1 and name = "getObject"
or
result = node.(ElementReference).getChild(i) and name = "getChild"
or
result = node.(Else).getChild(i) and name = "getChild"
or
result = node.(Elsif).getAlternative() and i = -1 and name = "getAlternative"
or
result = node.(Elsif).getCondition() and i = -1 and name = "getCondition"
or
result = node.(Elsif).getConsequence() and i = -1 and name = "getConsequence"
or
result = node.(EndBlock).getChild(i) and name = "getChild"
or
result = node.(Ensure).getChild(i) and name = "getChild"
or
result = node.(ExceptionVariable).getChild() and i = -1 and name = "getChild"
or
result = node.(Exceptions).getChild(i) and name = "getChild"
or
result = node.(ExpressionReferencePattern).getValue() and i = -1 and name = "getValue"
or
result = node.(FindPattern).getClass() and i = -1 and name = "getClass"
or
result = node.(FindPattern).getChild(i) and name = "getChild"
or
result = node.(For).getBody() and i = -1 and name = "getBody"
or
result = node.(For).getPattern() and i = -1 and name = "getPattern"
or
result = node.(For).getValue() and i = -1 and name = "getValue"
or
result = node.(Hash).getChild(i) and name = "getChild"
or
result = node.(HashPattern).getClass() and i = -1 and name = "getClass"
or
result = node.(HashPattern).getChild(i) and name = "getChild"
or
result = node.(HashSplatArgument).getChild() and i = -1 and name = "getChild"
or
result = node.(HashSplatParameter).getName() and i = -1 and name = "getName"
or
result = node.(HeredocBody).getChild(i) and name = "getChild"
or
result = node.(If).getAlternative() and i = -1 and name = "getAlternative"
or
result = node.(If).getCondition() and i = -1 and name = "getCondition"
or
result = node.(If).getConsequence() and i = -1 and name = "getConsequence"
or
result = node.(IfGuard).getCondition() and i = -1 and name = "getCondition"
or
result = node.(IfModifier).getBody() and i = -1 and name = "getBody"
or
result = node.(IfModifier).getCondition() and i = -1 and name = "getCondition"
or
result = node.(In).getChild() and i = -1 and name = "getChild"
or
result = node.(InClause).getBody() and i = -1 and name = "getBody"
or
result = node.(InClause).getGuard() and i = -1 and name = "getGuard"
or
result = node.(InClause).getPattern() and i = -1 and name = "getPattern"
or
result = node.(Interpolation).getChild(i) and name = "getChild"
or
result = node.(KeywordParameter).getName() and i = -1 and name = "getName"
or
result = node.(KeywordParameter).getValue() and i = -1 and name = "getValue"
or
result = node.(KeywordPattern).getKey() and i = -1 and name = "getKey"
or
result = node.(KeywordPattern).getValue() and i = -1 and name = "getValue"
or
result = node.(Lambda).getBody() and i = -1 and name = "getBody"
or
result = node.(Lambda).getParameters() and i = -1 and name = "getParameters"
or
result = node.(LambdaParameters).getChild(i) and name = "getChild"
or
result = node.(LeftAssignmentList).getChild(i) and name = "getChild"
or
result = node.(MatchPattern).getPattern() and i = -1 and name = "getPattern"
or
result = node.(MatchPattern).getValue() and i = -1 and name = "getValue"
or
result = node.(Method).getBody() and i = -1 and name = "getBody"
or
result = node.(Method).getName() and i = -1 and name = "getName"
or
result = node.(Method).getParameters() and i = -1 and name = "getParameters"
or
result = node.(MethodParameters).getChild(i) and name = "getChild"
or
result = node.(Module).getBody() and i = -1 and name = "getBody"
or
result = node.(Module).getName() and i = -1 and name = "getName"
or
result = node.(Next).getChild() and i = -1 and name = "getChild"
or
result = node.(OperatorAssignment).getLeft() and i = -1 and name = "getLeft"
or
result = node.(OperatorAssignment).getRight() and i = -1 and name = "getRight"
or
result = node.(OptionalParameter).getName() and i = -1 and name = "getName"
or
result = node.(OptionalParameter).getValue() and i = -1 and name = "getValue"
or
result = node.(Pair).getKey() and i = -1 and name = "getKey"
or
result = node.(Pair).getValue() and i = -1 and name = "getValue"
or
result = node.(ParenthesizedPattern).getChild() and i = -1 and name = "getChild"
or
result = node.(ParenthesizedStatements).getChild(i) and name = "getChild"
or
result = node.(Pattern).getChild() and i = -1 and name = "getChild"
or
result = node.(Program).getChild(i) and name = "getChild"
or
result = node.(Range).getBegin() and i = -1 and name = "getBegin"
or
result = node.(Range).getEnd() and i = -1 and name = "getEnd"
or
result = node.(Rational).getChild() and i = -1 and name = "getChild"
or
result = node.(Redo).getChild() and i = -1 and name = "getChild"
or
result = node.(Regex).getChild(i) and name = "getChild"
or
result = node.(Rescue).getBody() and i = -1 and name = "getBody"
or
result = node.(Rescue).getExceptions() and i = -1 and name = "getExceptions"
or
result = node.(Rescue).getVariable() and i = -1 and name = "getVariable"
or
result = node.(RescueModifier).getBody() and i = -1 and name = "getBody"
or
result = node.(RescueModifier).getHandler() and i = -1 and name = "getHandler"
or
result = node.(RestAssignment).getChild() and i = -1 and name = "getChild"
or
result = node.(Retry).getChild() and i = -1 and name = "getChild"
or
result = node.(Return).getChild() and i = -1 and name = "getChild"
or
result = node.(RightAssignmentList).getChild(i) and name = "getChild"
or
result = node.(ScopeResolution).getName() and i = -1 and name = "getName"
or
result = node.(ScopeResolution).getScope() and i = -1 and name = "getScope"
or
result = node.(Setter).getName() and i = -1 and name = "getName"
or
result = node.(SingletonClass).getBody() and i = -1 and name = "getBody"
or
result = node.(SingletonClass).getValue() and i = -1 and name = "getValue"
or
result = node.(SingletonMethod).getBody() and i = -1 and name = "getBody"
or
result = node.(SingletonMethod).getName() and i = -1 and name = "getName"
or
result = node.(SingletonMethod).getObject() and i = -1 and name = "getObject"
or
result = node.(SingletonMethod).getParameters() and i = -1 and name = "getParameters"
or
result = node.(SplatArgument).getChild() and i = -1 and name = "getChild"
or
result = node.(SplatParameter).getName() and i = -1 and name = "getName"
or
result = node.(String).getChild(i) and name = "getChild"
or
result = node.(StringArray).getChild(i) and name = "getChild"
or
result = node.(Subshell).getChild(i) and name = "getChild"
or
result = node.(Superclass).getChild() and i = -1 and name = "getChild"
or
result = node.(SymbolArray).getChild(i) and name = "getChild"
or
result = node.(TestPattern).getPattern() and i = -1 and name = "getPattern"
or
result = node.(TestPattern).getValue() and i = -1 and name = "getValue"
or
result = node.(Then).getChild(i) and name = "getChild"
or
result = node.(Unary).getOperand() and i = -1 and name = "getOperand"
or
result = node.(Undef).getChild(i) and name = "getChild"
or
result = node.(Unless).getAlternative() and i = -1 and name = "getAlternative"
or
result = node.(Unless).getCondition() and i = -1 and name = "getCondition"
or
result = node.(Unless).getConsequence() and i = -1 and name = "getConsequence"
or
result = node.(UnlessGuard).getCondition() and i = -1 and name = "getCondition"
or
result = node.(UnlessModifier).getBody() and i = -1 and name = "getBody"
or
result = node.(UnlessModifier).getCondition() and i = -1 and name = "getCondition"
or
result = node.(Until).getBody() and i = -1 and name = "getBody"
or
result = node.(Until).getCondition() and i = -1 and name = "getCondition"
or
result = node.(UntilModifier).getBody() and i = -1 and name = "getBody"
or
result = node.(UntilModifier).getCondition() and i = -1 and name = "getCondition"
or
result = node.(VariableReferencePattern).getName() and i = -1 and name = "getName"
or
result = node.(When).getBody() and i = -1 and name = "getBody"
or
result = node.(When).getPattern(i) and name = "getPattern"
or
result = node.(While).getBody() and i = -1 and name = "getBody"
or
result = node.(While).getCondition() and i = -1 and name = "getCondition"
or
result = node.(WhileModifier).getBody() and i = -1 and name = "getBody"
or
result = node.(WhileModifier).getCondition() and i = -1 and name = "getCondition"
or
result = node.(Yield).getChild() and i = -1 and name = "getChild"
}
}
}
overlay[local]
@@ -2441,20 +2107,4 @@ module Erb {
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { erb_template_child(this, _, result) }
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(CommentDirective).getChild() and i = -1 and name = "getChild"
or
result = node.(Directive).getChild() and i = -1 and name = "getChild"
or
result = node.(GraphqlDirective).getChild() and i = -1 and name = "getChild"
or
result = node.(OutputDirective).getChild() and i = -1 and name = "getChild"
or
result = node.(Template).getChild(i) and name = "getChild"
}
}
}

View File

@@ -28,7 +28,6 @@ nodes
| string_flow.rb:227:10:227:10 | a | semmle.label | a |
subpaths
testFailures
| string_flow.rb:85:10:85:10 | a | Unexpected result: hasValueFlow=a |
| string_flow.rb:227:10:227:10 | a | Unexpected result: hasValueFlow=a |
#select
| string_flow.rb:3:10:3:22 | call to new | string_flow.rb:2:9:2:18 | call to source | string_flow.rb:3:10:3:22 | call to new | $@ | string_flow.rb:2:9:2:18 | call to source | call to source |

View File

@@ -82,7 +82,7 @@ end
def m_clear
a = source "a"
a.clear
sink a
sink a # $ hasValueFlow=a
end
# concat and prepend omitted because they clash with the summaries for

View File

@@ -18,7 +18,7 @@ class OneController < ActionController::Base
end
def c
sink @foo
sink @foo # $ hasTaintFlow
end
end

View File

@@ -270,7 +270,6 @@ nodes
| params_flow.rb:205:10:205:10 | a | semmle.label | a |
subpaths
testFailures
| filter_flow.rb:21:10:21:13 | @foo | Unexpected result: hasTaintFlow |
| filter_flow.rb:38:10:38:13 | @foo | Unexpected result: hasTaintFlow |
| filter_flow.rb:55:10:55:13 | @foo | Unexpected result: hasTaintFlow |
| filter_flow.rb:71:10:71:17 | call to bar | Unexpected result: hasTaintFlow |

View File

@@ -497,7 +497,6 @@ nodes
| hash_extensions.rb:126:10:126:19 | call to sole | semmle.label | call to sole |
subpaths
testFailures
| hash_extensions.rb:126:10:126:19 | call to sole | Unexpected result: hasValueFlow=b |
#select
| active_support.rb:182:10:182:13 | ...[...] | active_support.rb:180:10:180:17 | call to source | active_support.rb:182:10:182:13 | ...[...] | $@ | active_support.rb:180:10:180:17 | call to source | call to source |
| active_support.rb:188:10:188:13 | ...[...] | active_support.rb:186:10:186:18 | call to source | active_support.rb:188:10:188:13 | ...[...] | $@ | active_support.rb:186:10:186:18 | call to source | call to source |

View File

@@ -123,7 +123,7 @@ def m_sole
multi = [source("b"), source("c")]
sink(empty.sole)
sink(single.sole) # $ hasValueFlow=a
sink(multi.sole) # TODO: model that 'sole' does not return if the receiver has multiple elements
sink(multi.sole) # $ hasValueFlow=b # TODO: model that 'sole' does not return if the receiver has multiple elements
end
m_sole()

View File

@@ -23,7 +23,6 @@ nodes
| views/index.erb:2:10:2:12 | call to foo | semmle.label | call to foo |
subpaths
testFailures
| views/index.erb:2:10:2:12 | call to foo | Unexpected result: hasTaintFlow |
#select
| app.rb:95:10:95:14 | @user | app.rb:103:13:103:22 | call to source | app.rb:95:10:95:14 | @user | $@ | app.rb:103:13:103:22 | call to source | call to source |
| views/index.erb:2:10:2:12 | call to foo | app.rb:75:12:75:17 | call to params | views/index.erb:2:10:2:12 | call to foo | $@ | app.rb:75:12:75:17 | call to params | call to params |

View File

@@ -1,2 +1,2 @@
<%= @foo %>
<%= sink foo %>
<%= sink foo # $ hasTaintFlow %>

View File

@@ -1,5 +1,4 @@
testFailures
| improper_memoization.rb:100:1:104:3 | m14 | Unexpected result: result=BAD |
#select
| improper_memoization.rb:50:1:55:3 | m7 | improper_memoization.rb:50:8:50:10 | arg | improper_memoization.rb:51:3:53:5 | ... \|\|= ... |
| improper_memoization.rb:58:1:63:3 | m8 | improper_memoization.rb:58:8:58:10 | arg | improper_memoization.rb:59:3:61:5 | ... \|\|= ... |

View File

@@ -101,4 +101,4 @@ def m14(arg)
@m14 ||= {}
key = "foo/#{arg}"
@m14[key] ||= long_running_method(arg)
end
end # $ SPURIOUS: result=BAD

View File

@@ -280,11 +280,10 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
/// When `desugarer` is `Some`, the parsed tree is first transformed
/// through the supplied yeast desugarer before TRAP extraction. Building
/// the desugarer (which parses YAML and constructs the schema) is the
/// caller's responsibility, allowing it to be done once and shared across
/// files.
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
/// through the supplied yeast `Runner` before TRAP extraction. Building the
/// `Runner` (which parses YAML and constructs the schema) is the caller's
/// responsibility, allowing it to be done once and shared across files.
#[allow(clippy::too_many_arguments)]
pub fn extract(
language: &Language,
@@ -296,7 +295,7 @@ pub fn extract(
path: &Path,
source: &[u8],
ranges: &[Range],
desugarer: Option<&dyn yeast::Desugarer>,
yeast_runner: Option<&yeast::Runner<'_>>,
) {
let path_str = file_paths::normalize_and_transform_path(path, transformer);
let source_root = std::env::current_dir()
@@ -329,8 +328,8 @@ pub fn extract(
schema,
);
if let Some(desugarer) = desugarer {
let ast = desugarer
if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);

View File

@@ -13,14 +13,11 @@ pub struct LanguageSpec {
pub prefix: &'static str,
pub ts_language: tree_sitter::Language,
pub node_types: &'static str,
/// Optional desugarer. When set, the parsed tree is rewritten through
/// the desugarer before TRAP extraction. The desugarer's
/// `output_node_types_yaml()` (if set) provides the schema used both
/// at runtime (for the rewriter) and for TRAP validation.
///
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
/// the user-defined context type the desugarer uses internally.
pub desugar: Option<Box<dyn yeast::Desugarer>>,
/// Optional yeast desugaring configuration. When set, the parsed
/// tree is rewritten through yeast before TRAP extraction. The
/// config's `output_node_types_yaml` (if set) provides the schema
/// used both at runtime (for the rewriter) and for TRAP validation.
pub desugar: Option<yeast::DesugaringConfig>,
pub file_globs: Vec<String>,
}
@@ -94,22 +91,35 @@ impl Extractor {
.collect();
let mut schemas = vec![];
let mut yeast_runners = Vec::new();
for lang in &self.languages {
let effective_node_types: String = match lang
.desugar
.as_ref()
.and_then(|d| d.output_node_types_yaml())
{
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let effective_node_types: String =
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);
// Build the yeast runner once per language so the YAML schema
// isn't re-parsed for every file.
let yeast_runner = lang
.desugar
.as_ref()
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
.transpose()
.map_err(|e| {
std::io::Error::other(format!(
"Failed to build desugaring runner for {}: {e}",
lang.prefix
))
})?;
yeast_runners.push(yeast_runner);
}
// Construct a single globset containing all language globs,
@@ -184,7 +194,7 @@ impl Extractor {
&path,
&source,
&[],
lang.desugar.as_deref(),
yeast_runners[i].as_ref(),
);
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;

View File

@@ -159,7 +159,6 @@ pub fn generate(
));
body.append(&mut ql_gen::convert_nodes(&nodes));
body.push(ql_gen::create_print_ast_module(&nodes));
ql::write(
&mut ql_writer,
&[ql::TopLevel::Module(ql::Module {

View File

@@ -150,14 +150,12 @@ impl fmt::Display for Type<'_> {
pub enum Expression<'a> {
Var(&'a str),
String(&'a str),
Integer(i64),
Integer(usize),
Pred(&'a str, Vec<Expression<'a>>),
And(Vec<Expression<'a>>),
Or(Vec<Expression<'a>>),
Equals(Box<Expression<'a>>, Box<Expression<'a>>),
Dot(Box<Expression<'a>>, &'a str, Vec<Expression<'a>>),
/// A type cast, rendered as `x.(Type)`.
Cast(Box<Expression<'a>>, &'a str),
Aggregate {
name: &'a str,
vars: Vec<FormalParameter<'a>>,
@@ -221,7 +219,6 @@ impl fmt::Display for Expression<'_> {
}
write!(f, ")")
}
Expression::Cast(x, type_name) => write!(f, "{x}.({type_name})"),
Expression::Aggregate {
name,
vars,

View File

@@ -705,7 +705,7 @@ fn create_field_getters<'a>(
),
ql::Expression::Equals(
Box::new(ql::Expression::Var("value")),
Box::new(ql::Expression::Integer(*value as i64)),
Box::new(ql::Expression::Integer(*value)),
),
])
})
@@ -874,99 +874,3 @@ pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<ql::TopLevel<'_>> {
classes
}
/// Creates a `PrintAst` module containing a `getChild` predicate that maps each
/// AST node to its children together with the name of the member predicate that
/// produced them (and, for indexed fields, the index). This mirrors the
/// information exposed by `getAFieldOrChild`, but keeps the member predicate
/// name and index so that an AST printer can render labelled edges.
pub fn create_print_ast_module(nodes: &node_types::NodeTypeMap) -> ql::TopLevel<'_> {
let mut disjuncts: Vec<ql::Expression> = Vec::new();
for node in nodes.values() {
if let node_types::EntryKind::Table { name: _, fields } = &node.kind {
for field in fields {
// `ReservedWordInt` fields have string-valued getters, so they
// are not children and are excluded (just as they are from
// `getAFieldOrChild`).
if matches!(
field.type_info,
node_types::FieldTypeInfo::ReservedWordInt(_)
) {
continue;
}
let has_index = matches!(
field.storage,
node_types::Storage::Table {
has_index: true,
..
}
);
let getter_call = ql::Expression::Dot(
Box::new(ql::Expression::Cast(
Box::new(ql::Expression::Var("node")),
&node.ql_class_name,
)),
&field.getter_name,
if has_index {
vec![ql::Expression::Var("i")]
} else {
vec![]
},
);
let mut conjuncts = vec![ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(getter_call),
)];
if !has_index {
conjuncts.push(ql::Expression::Equals(
Box::new(ql::Expression::Var("i")),
Box::new(ql::Expression::Integer(-1)),
));
}
conjuncts.push(ql::Expression::Equals(
Box::new(ql::Expression::Var("name")),
Box::new(ql::Expression::String(&field.getter_name)),
));
disjuncts.push(ql::Expression::And(conjuncts));
}
}
}
let get_child = ql::Predicate {
qldoc: Some(String::from(
"Gets a child of `node` returned by the member predicate with the given `name`. \
If the predicate takes an index argument, `i` is bound to that index, otherwise \
`i` is `-1` (which is never a valid index).",
)),
name: "getChild",
overridden: false,
is_private: false,
is_final: false,
return_type: Some(ql::Type::Normal("AstNode")),
formal_parameters: vec![
ql::FormalParameter {
name: "node",
param_type: ql::Type::Normal("AstNode"),
},
ql::FormalParameter {
name: "name",
param_type: ql::Type::String,
},
ql::FormalParameter {
name: "i",
param_type: ql::Type::Int,
},
],
body: ql::Expression::Or(disjuncts),
overlay: None,
};
ql::TopLevel::Module(ql::Module {
qldoc: Some(String::from(
"Provides predicates for mapping AST nodes to their named children.",
)),
name: "PrintAst",
body: vec![ql::TopLevel::Predicate(get_child)],
overlay: None,
})
}

View File

@@ -22,9 +22,10 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
/// Parse a single query node (possibly with a trailing `@capture`).
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
let base = parse_query_atom(tokens)?;
// Check for trailing @capture or @@capture
// Check for trailing @capture
if peek_is_at(tokens) {
let capture_name = consume_capture_marker(tokens)?;
tokens.next(); // consume @
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
@@ -120,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
@@ -158,7 +159,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
let capture_name = consume_capture_marker(tokens)?;
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
@@ -293,10 +296,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
// tree! / trees! parsing — direct code generation against BuildCtx
// ---------------------------------------------------------------------------
const IMPLICIT_CTX: &str = "ctx";
const IMPLICIT_CTX: &str = "__yeast_ctx";
/// Determine the context identifier: either explicit `ctx,` or the implicit
/// `ctx` from an enclosing `rule!`.
/// `__yeast_ctx` from an enclosing `rule!`.
fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident {
// Check if first token is an ident followed by a comma
let mut lookahead = tokens.clone();
@@ -356,7 +359,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
Ok(quote! { ::std::convert::Into::<usize>::into({ #expr }) })
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
let group = expect_group(tokens, Delimiter::Parenthesis)?;
@@ -393,7 +396,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
let expr = group.stream();
return Ok(quote! {
{
let __expr = { #expr };
let __expr = (#expr);
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
@@ -417,11 +420,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Named fields — compute each value into a temp, then reference it
while peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name
.to_string()
.strip_prefix("r#")
.unwrap_or(&field_name.to_string())
.to_string();
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
expect_punct(tokens, ':', "expected `:` after field name")?;
let temp = Ident::new(
&format!("__field_{field_str}_{field_counter}"),
@@ -439,8 +438,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Determine if a chain (.map(..)) follows the `{}` group.
let mut after = tokens.clone();
after.next(); // skip the brace group
let has_chain =
matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain = matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
if is_splice || has_chain {
let group = expect_group(tokens, Delimiter::Brace)?;
@@ -450,11 +448,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
quote! {
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
}
} else {
let expr = group.stream();
quote! { { #expr }.into_iter() }
quote! { (#expr).into_iter() }
};
let chained = parse_chain_suffix(tokens, ctx, base)?;
stmts.push(quote! {
@@ -508,7 +506,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
/// Each call expects the receiver to be an iterator. The `base` argument
/// should therefore already be an iterator (use `.into_iter()` on it before
/// calling this function).
fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result<TokenStream> {
fn parse_chain_suffix(
tokens: &mut Tokens,
ctx: &Ident,
base: TokenStream,
) -> Result<TokenStream> {
let mut current = base;
while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') {
tokens.next(); // consume .
@@ -606,8 +608,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
// {expr} or {..expr} (with optional .chain) — single node or splice
if peek_is_group(tokens, Delimiter::Brace) {
let group = expect_group(tokens, Delimiter::Brace)?;
let has_chain =
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain = matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let mut inner = group.stream().into_iter().peekable();
let is_splice = peek_is_dotdot(&inner);
if is_splice || has_chain {
@@ -616,11 +617,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
quote! {
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
}
} else {
let expr = group.stream();
quote! { { #expr }.into_iter() }
quote! { (#expr).into_iter() }
};
let chained = parse_chain_suffix(tokens, ctx, base)?;
items.push(quote! {
@@ -629,7 +630,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
} else {
let expr = group.stream();
items.push(quote! {
__nodes.push(::std::convert::Into::<usize>::into({ #expr }));
__nodes.push(::std::convert::Into::<usize>::into(#expr));
});
}
continue;
@@ -648,9 +649,6 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
struct CaptureInfo {
name: String,
multiplicity: CaptureMultiplicity,
/// `true` for `@@name` captures: the auto-translate prefix skips them,
/// so the bound `NodeRef` refers to the raw (input-schema) node.
raw: bool,
}
#[derive(Clone, Copy, PartialEq)]
@@ -709,14 +707,6 @@ fn extract_captures_inner(
extract_captures_inner(&mut inner, captures, child_mult);
}
TokenTree::Punct(p) if p.as_char() == '@' => {
// `@@name` marks the capture as raw (skip auto-translate).
let raw = matches!(
tokens.peek(),
Some(TokenTree::Punct(p)) if p.as_char() == '@'
);
if raw {
tokens.next(); // consume the second `@`
}
if let Some(TokenTree::Ident(name)) = tokens.next() {
let mult = if parent_mult == CaptureMultiplicity::Repeated
|| last_mult == CaptureMultiplicity::Repeated
@@ -732,7 +722,6 @@ fn extract_captures_inner(
captures.push(CaptureInfo {
name: name.to_string(),
multiplicity: mult,
raw,
});
}
last_mult = CaptureMultiplicity::Single;
@@ -786,14 +775,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
// Parse query
let query_code = parse_query_top(query_stream.clone())?;
// Capture names marked `@@name` (raw) — passed to the auto-translate
// prefix as a skip list so those captures keep their input-schema ids.
let raw_capture_names: Vec<&str> = captures
.iter()
.filter(|c| c.raw)
.map(|c| c.name.as_str())
.collect();
// Generate capture bindings
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
let bindings: Vec<TokenStream> = captures
@@ -907,20 +888,10 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
Ok(quote! {
{
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// Auto-translation prefix: recursively translate every
// captured node before invoking the user's transform body,
// except for `@@name` captures listed in `__skip` which the
// body consumes raw.
// For OneShot rules this preserves the legacy behaviour
// (input-schema captures translated to output-schema
// nodes); for Repeating rules it is a no-op.
let __skip: &[&str] = &[#(#raw_capture_names),*];
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>| {
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
let __result: Vec<usize> = { #transform_body };
Ok(__result)
let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range);
#transform_body
}))
}
})
@@ -934,16 +905,6 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
}
/// Consume an `@` or `@@` capture marker and the following name ident.
/// Caller has already verified `peek_is_at(tokens)`.
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
tokens.next(); // consume the first `@`
if peek_is_at(tokens) {
tokens.next(); // consume the second `@` of `@@`
}
expect_ident(tokens, "expected capture name after `@` or `@@`")
}
fn peek_is_literal(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
}
@@ -1044,7 +1005,8 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
let name = consume_capture_marker(tokens)?;
tokens.next(); // consume @
let name = expect_ident(tokens, "expected capture name after @")?;
let name_str = name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
@@ -1071,12 +1033,13 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
}
}
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
/// child SingleNode inside the repetition with a Capture. This matches
/// tree-sitter semantics where `(_)* @name` captures each matched node.
/// If `@name` follows a Repeated list element, wrap each child SingleNode
/// inside the repetition with a Capture. This matches tree-sitter semantics
/// where `(_)* @name` captures each matched node.
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
let name = consume_capture_marker(tokens)?;
tokens.next();
let name = expect_ident(tokens, "expected capture name after @")?;
let name_str = name.to_string();
// Re-parse the element isn't practical, so we generate a wrapper
// that creates a new Repeated with each child wrapped in a capture.

View File

@@ -265,21 +265,7 @@ occurrences of the same `$name` within one `BuildCtx` share the same value:
)
```
The contents of `{…}` are treated as a Rust block, so multi-statement
expressions (with `let` bindings) work too:
```rust
(assignment
left: {tmp}
right: {
let lit = ctx.literal("integer", "0");
tree!((binary_expr op: (operator "+") left: {tmp} right: {lit}))
})
```
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`); the contents
are likewise a Rust block, so the splice can be the result of arbitrary
computation:
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`):
```rust
yeast::trees!(ctx,
@@ -292,37 +278,6 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
single capture (`Id`) and `{..name}` splices a repeated capture
(`Vec<Id>`).
### Raw captures (`@@name`)
The default `@name` capture marker is *auto-translated*: in OneShot
phases the macro recursively translates the captured node before
binding it, so `{name}` in the output template splices a node that
already conforms to the output schema.
For rules that need the raw (input-schema) capture — typically to read
its source text or to translate it explicitly with mutable context
state between calls — use `@@name` instead. The body sees the original
input-schema `NodeRef`:
```rust
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
// raw_lhs is untranslated: read its original source text.
let text = ctx.ast.source_text(raw_lhs.into());
// rhs is already translated by the auto-translate prefix.
tree!((call
method: (identifier #{text.as_str()})
receiver: {rhs}))
}
);
```
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
markers are equivalent (auto-translation is a no-op for repeating
rules).
## Complete example: for-loop desugaring
This rule rewrites Ruby's `for pat in val do body end` into

View File

@@ -20,7 +20,7 @@ fn main() {
let args = Cli::parse();
let language = get_language(&args.language);
let source = std::fs::read_to_string(&args.file).unwrap();
let runner: yeast::Runner = yeast::Runner::new(language, &[]);
let runner = yeast::Runner::new(language, &[]);
let ast = runner.run(&source).unwrap();
println!("{}", ast.print(&source, ast.get_root()));
}

View File

@@ -2,60 +2,28 @@ use std::collections::BTreeMap;
use crate::captures::Captures;
use crate::tree_builder::FreshScope;
use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle};
use crate::{Ast, FieldId, Id, NodeContent};
/// Context for building new AST nodes during a transformation.
///
/// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the
/// AST, a reference to the captures from a query match, a `FreshScope` for
/// generating unique identifiers, and a mutable reference to a user-defined
/// context of type `C`.
///
/// The user context `C` is shared across rules via the framework's driver:
/// outer rules can write to it before recursive translation, and inner rules
/// can read (or further mutate) it during their transforms. The framework
/// snapshots and restores the user context around each rule application, so
/// mutations made by a rule are visible to its descendants (via recursive
/// translation) but not to its parent's siblings.
///
/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user
/// context fields are accessible as `ctx.my_field` directly (provided they
/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.).
///
/// The default `C = ()` means rules that don't need any user context don't
/// pay any cost.
///
/// When constructed by the framework (via the rule! macro), `BuildCtx` also
/// carries a [`TranslatorHandle`] that the [`translate`] method delegates
/// to. When constructed by hand (e.g. in tests), the translator is `None`
/// and [`translate`] returns an error.
pub struct BuildCtx<'a, C: 'a = ()> {
/// AST, a reference to the captures from a query match, and a `FreshScope` for
/// generating unique identifiers.
pub struct BuildCtx<'a> {
pub ast: &'a mut Ast,
pub captures: &'a Captures,
pub fresh: &'a FreshScope,
/// Source range of the matched node, inherited by synthetic nodes.
pub source_range: Option<tree_sitter::Range>,
/// User-supplied context, accessible directly via `ctx.field` (via Deref).
pub user_ctx: &'a mut C,
/// Optional translator handle, populated when the context is built by
/// the framework's rule driver. None when the context is built by hand.
pub(crate) translator: Option<TranslatorHandle<'a, C>>,
}
impl<'a, C> BuildCtx<'a, C> {
pub fn new(
ast: &'a mut Ast,
captures: &'a Captures,
fresh: &'a FreshScope,
user_ctx: &'a mut C,
) -> Self {
impl<'a> BuildCtx<'a> {
pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self {
Self {
ast,
captures,
fresh,
source_range: None,
user_ctx,
translator: None,
}
}
@@ -64,35 +32,12 @@ impl<'a, C> BuildCtx<'a, C> {
captures: &'a Captures,
fresh: &'a FreshScope,
source_range: Option<tree_sitter::Range>,
user_ctx: &'a mut C,
) -> Self {
Self {
ast,
captures,
fresh,
source_range,
user_ctx,
translator: None,
}
}
/// Construct a `BuildCtx` carrying a translator handle. Used by the
/// `rule!` macro to enable [`translate`] inside rule transforms.
pub fn with_translator(
ast: &'a mut Ast,
captures: &'a Captures,
fresh: &'a FreshScope,
source_range: Option<tree_sitter::Range>,
user_ctx: &'a mut C,
translator: TranslatorHandle<'a, C>,
) -> Self {
Self {
ast,
captures,
fresh,
source_range,
user_ctx,
translator: Some(translator),
}
}
@@ -168,52 +113,3 @@ impl<'a, C> BuildCtx<'a, C> {
self.ast.prepend_field_child(node_id, field_id, value_id);
}
}
impl<C: Clone> BuildCtx<'_, C> {
/// Recursively translate a node via the framework's rule machinery.
/// In a OneShot phase, applies OneShot rules to the given node and
/// returns the resulting node ids. In a Repeating phase, errors
/// (translation is not meaningful when input and output share a
/// schema).
///
/// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]),
/// so manual rules can pass capture bindings directly without unwrapping.
///
/// Errors if this `BuildCtx` was constructed by hand (without a
/// translator handle) — for example, in unit tests that don't go
/// through the rule driver.
pub fn translate<I: Into<Id>>(&mut self, id: I) -> Result<Vec<Id>, String> {
let id = id.into();
match &self.translator {
Some(t) => t.translate(self.ast, self.user_ctx, id),
None => Err("translate() called on a BuildCtx without a translator handle".into()),
}
}
/// Translate an optional capture, returning the first translated id or
/// `None`. Convenience for `?`-quantifier captures (`Option<NodeRef>`).
///
/// If the underlying translation produces multiple ids for a single
/// input, only the first is returned. For most use cases (e.g.
/// translating a single type annotation) this is what you want; if
/// you need all ids, use [`translate`] directly.
pub fn translate_opt<I: Into<Id>>(&mut self, id: Option<I>) -> Result<Option<Id>, String> {
match id {
Some(id) => Ok(self.translate(id)?.into_iter().next()),
None => Ok(None),
}
}
}
impl<C> std::ops::Deref for BuildCtx<'_, C> {
type Target = C;
fn deref(&self) -> &C {
&*self.user_ctx
}
}
impl<C> std::ops::DerefMut for BuildCtx<'_, C> {
fn deref_mut(&mut self) -> &mut C {
&mut *self.user_ctx
}
}

View File

@@ -80,28 +80,6 @@ impl Captures {
}
Ok(())
}
/// Like [`try_map_all_captures`] but leaves captures whose name appears
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
/// (raw) captures alongside the default auto-translated `@name`
/// captures.
pub fn try_map_captures_except<E>(
&mut self,
skip: &[&str],
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
) -> Result<(), E> {
for (name, ids) in self.captures.iter_mut() {
if skip.contains(name) {
continue;
}
let mut new_ids = Vec::with_capacity(ids.len());
for &id in ids.iter() {
new_ids.extend(f(id)?);
}
*ids = new_ids;
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();

View File

@@ -53,7 +53,12 @@ pub fn dump_ast_with_options(
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String {
pub fn dump_ast_with_type_errors(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
) -> String {
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
}
@@ -69,15 +74,7 @@ pub fn dump_ast_with_type_errors_and_options(
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(
ast,
root,
source,
options,
0,
Some((schema, None, None)),
&mut out,
);
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
out
}
@@ -235,8 +232,8 @@ fn dump_node(
}
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
let child_type_check = type_check.map(|(schema, _, _)| {
let expected =
expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES));
let expected = expected_for_field(schema, node.kind_name(), field_id)
.or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), field_name));
(schema, expected, parent_field)
});

View File

@@ -48,12 +48,6 @@ impl From<NodeRef> for Id {
}
}
impl From<Id> for NodeRef {
fn from(value: Id) -> Self {
NodeRef(value)
}
}
/// Like [`std::fmt::Display`], but the formatting routine is given access to
/// the [`Ast`] so that node references can resolve to their source text.
///
@@ -303,9 +297,7 @@ impl Ast {
/// Returns the source text for `id`, resolving `NodeContent::Range`
/// against the stored source bytes when available.
pub fn source_text(&self, id: Id) -> String {
let Some(node) = self.get_node(id) else {
return String::new();
};
let Some(node) = self.get_node(id) else { return String::new(); };
let read_range = |range: &tree_sitter::Range| {
let start = range.start_byte;
let end = range.end_byte;
@@ -496,10 +488,7 @@ impl Ast {
/// Prepend a child id to the given field of the given node.
pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) {
let node = self
.nodes
.get_mut(node_id)
.expect("prepend_field_child: invalid node id");
let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id");
node.fields.entry(field_id).or_default().insert(0, value_id);
}
@@ -711,120 +700,18 @@ impl From<tree_sitter::Range> for NodeContent {
}
}
/// A handle that lets a rule transform recursively translate AST nodes via
/// the framework's rule machinery. Constructed by the driver and passed as
/// the last argument of every [`Transform`] invocation.
///
/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in
/// its generated prefix to translate captures before running the user's
/// transform body. Manually-written transforms (using [`Rule::new`]
/// directly) can call [`TranslatorHandle::translate`] selectively on
/// specific node ids to control when translation happens.
pub struct TranslatorHandle<'a, C> {
inner: TranslatorImpl<'a, C>,
}
/// Internal phase-specific translation state. Kept private — callers
/// interact with [`TranslatorHandle`] only.
enum TranslatorImpl<'a, C> {
/// OneShot phase translator: recursively applies OneShot rules.
OneShot {
index: &'a RuleIndex<'a, C>,
fresh: &'a tree_builder::FreshScope,
rewrite_depth: usize,
/// The id of the node the current rule is matching. Used by
/// [`auto_translate_captures`] to avoid infinite recursion when a
/// rule captures its own match root (e.g. via `(_) @_`).
matched_root: Id,
},
/// Repeating phase translator: translation is not meaningful here
/// (input and output schemas are the same). [`translate`] errors;
/// [`auto_translate_captures`] is a no-op so the macro's auto-prefix
/// works unchanged for Repeating rules.
Repeating,
}
impl<'a, C: Clone> TranslatorHandle<'a, C> {
/// Recursively apply OneShot rules to `id` and return the resulting
/// node ids. Errors in a Repeating phase (where translation is not
/// meaningful).
pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result<Vec<Id>, String> {
match &self.inner {
TranslatorImpl::OneShot {
index,
fresh,
rewrite_depth,
..
} => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1),
TranslatorImpl::Repeating => {
Err("translate() is not available in a Repeating phase".into())
}
}
}
/// Translate every captured node in `captures` in place (OneShot phase
/// only), except for captures whose name appears in `skip` — those are
/// left as raw (input-schema) ids for the rule body to consume
/// directly. In a Repeating phase this is a no-op — Repeating rules
/// receive raw captures regardless of `skip`.
///
/// Used by the `rule!` macro's generated prefix. `skip` is populated
/// from the macro's `@@name` capture markers; for plain `@name`
/// captures (and rules with no `@@` markers) it is empty.
///
/// To avoid infinite recursion, a capture whose id matches the rule's
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
pub fn auto_translate_captures(
&self,
captures: &mut Captures,
ast: &mut Ast,
user_ctx: &mut C,
skip: &[&str],
) -> Result<(), String> {
match &self.inner {
TranslatorImpl::OneShot { matched_root, .. } => {
let root = *matched_root;
captures.try_map_captures_except(skip, |cid| {
if cid == root {
Ok(vec![cid])
} else {
self.translate(ast, user_ctx, cid)
}
})
}
TranslatorImpl::Repeating => Ok(()),
}
}
}
/// The transform function for a rule.
///
/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name
/// scope, the source range of the matched node, a mutable reference to the
/// user context of type `C`, and a [`TranslatorHandle`] for recursively
/// translating nodes. Returns the IDs of the replacement nodes, or an
/// error message if the transform could not be completed.
///
/// Transforms produced by [`Rule::new`] receive **raw** captures and must
/// translate them themselves (via the handle). Transforms produced by the
/// `rule!` macro have an auto-translation prefix injected for backward
/// compatibility.
pub type Transform<C = ()> = Box<
dyn Fn(
&mut Ast,
Captures,
&tree_builder::FreshScope,
Option<tree_sitter::Range>,
&mut C,
TranslatorHandle<'_, C>,
) -> Result<Vec<Id>, String>
/// The transform function for a rule: takes the AST, captured variables, a
/// fresh-name scope, and the source range of the matched node, and returns
/// the IDs of the replacement nodes.
pub type Transform = Box<
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> Vec<Id>
+ Send
+ Sync,
>;
pub struct Rule<C = ()> {
pub struct Rule {
query: QueryNode,
transform: Transform<C>,
transform: Transform,
/// If true, after this rule fires on a node the engine will try to
/// re-apply this same rule on the result root. Defaults to false:
/// each rule fires at most once on a given node, which prevents
@@ -832,8 +719,8 @@ pub struct Rule<C = ()> {
repeated: bool,
}
impl<C> Rule<C> {
pub fn new(query: QueryNode, transform: Transform<C>) -> Self {
impl Rule {
pub fn new(query: QueryNode, transform: Transform) -> Self {
Self {
query,
transform,
@@ -855,13 +742,9 @@ impl<C> Rule<C> {
ast: &mut Ast,
node: Id,
fresh: &tree_builder::FreshScope,
user_ctx: &mut C,
translator: TranslatorHandle<'_, C>,
) -> Result<Option<Vec<Id>>, String> {
match self.try_match(ast, node)? {
Some(captures) => Ok(Some(
self.run_transform(ast, captures, node, fresh, user_ctx, translator)?,
)),
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
None => Ok(None),
}
}
@@ -885,31 +768,29 @@ impl<C> Rule<C> {
captures: Captures,
node: Id,
fresh: &tree_builder::FreshScope,
user_ctx: &mut C,
translator: TranslatorHandle<'_, C>,
) -> Result<Vec<Id>, String> {
) -> Vec<Id> {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
(self.transform)(ast, captures, fresh, source_range, user_ctx, translator)
(self.transform)(ast, captures, fresh, source_range)
}
}
const MAX_REWRITE_DEPTH: usize = 100;
/// Index of rules by their root query kind for fast lookup.
struct RuleIndex<'a, C> {
struct RuleIndex<'a> {
/// Rules indexed by root node kind name.
by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>>,
by_kind: BTreeMap<&'static str, Vec<&'a Rule>>,
/// Rules with wildcard queries (Any) that apply to all nodes.
wildcard: Vec<&'a Rule<C>>,
wildcard: Vec<&'a Rule>,
}
impl<'a, C> RuleIndex<'a, C> {
fn new(rules: &'a [Rule<C>]) -> Self {
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>> = BTreeMap::new();
impl<'a> RuleIndex<'a> {
fn new(rules: &'a [Rule]) -> Self {
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new();
let mut wildcard = Vec::new();
for rule in rules {
match rule.query.root_kind() {
@@ -920,7 +801,7 @@ impl<'a, C> RuleIndex<'a, C> {
Self { by_kind, wildcard }
}
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule<C>> {
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule> {
self.by_kind
.get(kind)
.into_iter()
@@ -929,25 +810,23 @@ impl<'a, C> RuleIndex<'a, C> {
}
}
fn apply_repeating_rules<C: Clone>(
rules: &[Rule<C>],
fn apply_repeating_rules(
rules: &[Rule],
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None)
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
}
fn apply_repeating_rules_inner<C: Clone>(
index: &RuleIndex<C>,
fn apply_repeating_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
skip_rule: Option<*const Rule<C>>,
skip_rule: Option<*const Rule>,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
@@ -958,23 +837,11 @@ fn apply_repeating_rules_inner<C: Clone>(
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
for rule in index.rules_for_kind(node_kind) {
let rule_ptr = *rule as *const Rule<C>;
let rule_ptr = *rule as *const Rule;
if Some(rule_ptr) == skip_rule {
continue;
}
// Snapshot the user context before invoking the rule so that any
// mutations the rule makes are visible during recursive translation
// of its result, but not leaked to the parent's siblings.
let snapshot = user_ctx.clone();
// Repeating rules don't need a real translator: their captures
// aren't auto-translated (Repeating preserves the input schema),
// and `ctx.translate(id)` errors if invoked from a Repeating
// transform.
let translator = TranslatorHandle {
inner: TranslatorImpl::Repeating,
};
let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?;
if let Some(result_node) = try_result {
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
// For non-repeated rules, suppress further application of *this*
// rule on the result root, so a rule whose output matches its own
// query doesn't loop. Other rules and child traversal are
@@ -985,19 +852,14 @@ fn apply_repeating_rules_inner<C: Clone>(
results.extend(apply_repeating_rules_inner(
index,
ast,
user_ctx,
node,
fresh,
rewrite_depth + 1,
next_skip,
)?);
}
*user_ctx = snapshot;
return Ok(results);
}
// Rule didn't match; restore any speculative changes (none expected
// since try_rule only mutates on match, but be defensive).
*user_ctx = snapshot;
}
// Take the parent's fields by ownership: the recursion will rewrite
@@ -1012,15 +874,7 @@ fn apply_repeating_rules_inner<C: Clone>(
for children in fields.values_mut() {
let mut new_children: Option<Vec<Id>> = None;
for (i, &child_id) in children.iter().enumerate() {
let result = apply_repeating_rules_inner(
index,
ast,
user_ctx,
child_id,
fresh,
rewrite_depth,
None,
)?;
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let unchanged = result.len() == 1 && result[0] == child_id;
match (&mut new_children, unchanged) {
(None, true) => {} // unchanged so far, no allocation needed
@@ -1049,25 +903,24 @@ fn apply_repeating_rules_inner<C: Clone>(
/// each visited node, recursion proceeds only through captured nodes (not
/// through the input node's children directly), and an error is returned if
/// no rule matches a visited node.
fn apply_one_shot_rules<C: Clone>(
rules: &[Rule<C>],
fn apply_one_shot_rules(
rules: &[Rule],
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0)
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
}
fn apply_one_shot_rules_inner<C: Clone>(
index: &RuleIndex<C>,
fn apply_one_shot_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
@@ -1078,27 +931,22 @@ fn apply_one_shot_rules_inner<C: Clone>(
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
for rule in index.rules_for_kind(node_kind) {
if let Some(captures) = rule.try_match(ast, id)? {
// Snapshot the user context before invoking the rule so that any
// mutations the rule (or its transitively-translated captures)
// make are visible during this rule's transform, but not leaked
// to the parent's siblings.
let snapshot = user_ctx.clone();
// Build the translator handle the transform will use to
// recursively translate captures (or, for macro-generated
// rules, the auto-translate prefix uses it to translate every
// capture up front, preserving the legacy behavior).
let translator = TranslatorHandle {
inner: TranslatorImpl::OneShot {
index,
fresh,
rewrite_depth,
matched_root: id,
},
};
let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?;
*user_ctx = snapshot;
return Ok(result);
if let Some(mut captures) = rule.try_match(ast, id)? {
// Recursively translate every captured node before invoking the
// transform. The transform's output uses output-schema kinds, so
// we must translate captured input-schema nodes to their
// output-schema equivalents first.
captures.try_map_all_captures(|captured_id| {
// Avoid infinite recursion when a capture refers to the root
// node of the matched tree (e.g. an `@_` capture on the
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(vec![captured_id]);
}
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
}
}
@@ -1126,15 +974,15 @@ pub enum PhaseKind {
/// starts. Rules within a phase compete for matches as usual; rules in
/// different phases never compete because each traversal only considers the
/// current phase's rules.
pub struct Phase<C = ()> {
pub struct Phase {
/// Name used in error messages.
pub name: String,
pub rules: Vec<Rule<C>>,
pub rules: Vec<Rule>,
pub kind: PhaseKind,
}
impl<C> Phase<C> {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule<C>>) -> Self {
impl Phase {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
Self {
name: name.into(),
rules,
@@ -1160,30 +1008,17 @@ impl<C> Phase<C> {
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
/// .with_output_node_types_yaml(yaml);
/// ```
///
/// The optional type parameter `C` is the user context type threaded through
/// rule transforms. Defaults to `()` (no user context).
pub struct DesugaringConfig<C = ()> {
#[derive(Default)]
pub struct DesugaringConfig {
/// Phases of rule application, applied in order.
pub phases: Vec<Phase<C>>,
pub phases: Vec<Phase>,
/// Output node-types in YAML format. If `None`, the input grammar's
/// node types are used (i.e. the desugared AST has the same node types
/// as the tree-sitter grammar).
pub output_node_types_yaml: Option<&'static str>,
}
// Manual `Default` impl so users with a custom `C` that doesn't implement
// `Default` can still construct an empty config.
impl<C> Default for DesugaringConfig<C> {
fn default() -> Self {
Self {
phases: Vec::new(),
output_node_types_yaml: None,
}
}
}
impl<C> DesugaringConfig<C> {
impl DesugaringConfig {
/// Create an empty configuration. Add phases via [`add_phase`] and an
/// optional output schema via [`with_output_node_types_yaml`].
pub fn new() -> Self {
@@ -1195,7 +1030,7 @@ impl<C> DesugaringConfig<C> {
mut self,
name: impl Into<String>,
kind: PhaseKind,
rules: Vec<Rule<C>>,
rules: Vec<Rule>,
) -> Self {
self.phases.push(Phase::new(name, kind, rules));
self
@@ -1217,15 +1052,15 @@ impl<C> DesugaringConfig<C> {
}
}
pub struct Runner<'a, C = ()> {
pub struct Runner<'a> {
language: tree_sitter::Language,
schema: schema::Schema,
phases: &'a [Phase<C>],
phases: &'a [Phase],
}
impl<'a, C> Runner<'a, C> {
impl<'a> Runner<'a> {
/// Create a runner using the input grammar's schema for output.
pub fn new(language: tree_sitter::Language, phases: &'a [Phase<C>]) -> Self {
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
let schema = schema::Schema::from_language(&language);
Self {
language,
@@ -1238,7 +1073,7 @@ impl<'a, C> Runner<'a, C> {
pub fn with_schema(
language: tree_sitter::Language,
schema: &schema::Schema,
phases: &'a [Phase<C>],
phases: &'a [Phase],
) -> Self {
Self {
language,
@@ -1250,7 +1085,7 @@ impl<'a, C> Runner<'a, C> {
/// Create a runner from a [`DesugaringConfig`].
pub fn from_config(
language: tree_sitter::Language,
config: &'a DesugaringConfig<C>,
config: &'a DesugaringConfig,
) -> Result<Self, String> {
let schema = config.build_schema(&language)?;
Ok(Self {
@@ -1259,17 +1094,11 @@ impl<'a, C> Runner<'a, C> {
phases: &config.phases,
})
}
}
impl<'a, C: Clone> Runner<'a, C> {
/// Parse `tree` against `source` and run all phases, threading
/// `user_ctx` through every rule transform. The caller owns the
/// initial context state.
pub fn run_from_tree_with_ctx(
pub fn run_from_tree(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
user_ctx: &mut C,
) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
@@ -1277,13 +1106,11 @@ impl<'a, C: Clone> Runner<'a, C> {
&self.language,
source.to_vec(),
);
self.run_phases(&mut ast, user_ctx)?;
self.run_phases(&mut ast)?;
Ok(ast)
}
/// Parse `input` and run all phases, threading `user_ctx` through
/// every rule transform. The caller owns the initial context state.
pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result<Ast, String> {
pub fn run(&self, input: &str) -> Result<Ast, String> {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&self.language)
@@ -1297,24 +1124,20 @@ impl<'a, C: Clone> Runner<'a, C> {
&self.language,
input.as_bytes().to_vec(),
);
self.run_phases(&mut ast, user_ctx)?;
self.run_phases(&mut ast)?;
Ok(ast)
}
/// Apply each phase in turn to the AST, threading the root through.
/// A single `FreshScope` is shared across phases so that fresh
/// identifiers generated in different phases don't collide.
fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> {
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
let fresh = tree_builder::FreshScope::new();
let mut root = ast.get_root();
for phase in self.phases {
let res = match phase.kind {
PhaseKind::Repeating => {
apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
PhaseKind::OneShot => {
apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
}
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
if res.len() != 1 {
@@ -1330,78 +1153,3 @@ impl<'a, C: Clone> Runner<'a, C> {
Ok(())
}
}
impl<'a, C: Clone + Default> Runner<'a, C> {
/// Parse `tree` against `source` and run all phases, using the
/// default context (`C::default()`) as the initial context state.
pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let mut user_ctx = C::default();
self.run_from_tree_with_ctx(tree, source, &mut user_ctx)
}
/// Parse `input` and run all phases, using the default context
/// (`C::default()`) as the initial context state.
pub fn run(&self, input: &str) -> Result<Ast, String> {
let mut user_ctx = C::default();
self.run_with_ctx(input, &mut user_ctx)
}
}
// ---------------------------------------------------------------------------
// Desugarer: type-erased view of a DesugaringConfig + Runner
// ---------------------------------------------------------------------------
/// Type-erased interface to a desugaring pipeline for a single language.
///
/// Consumers (e.g. a generic tree-sitter extractor) hold
/// `Box<dyn Desugarer>` so they can dispatch through the trait without
/// knowing the user context type `C` that's internal to yeast.
///
/// Construct one via [`ConcreteDesugarer::new`] from a
/// [`DesugaringConfig<C>`] and a [`tree_sitter::Language`].
pub trait Desugarer: Send + Sync {
/// The output AST schema (in YAML format), or `None` if the input
/// grammar's schema should be used.
fn output_node_types_yaml(&self) -> Option<&'static str>;
/// Parse `tree` against `source` and run the desugaring pipeline.
/// Each call constructs a fresh default user context internally.
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String>;
}
/// A concrete [`Desugarer`] backed by a [`DesugaringConfig<C>`] for a
/// specific user context type `C`. Stores the language and a pre-built
/// schema so that per-call cost is bounded to constructing a transient
/// [`Runner`] and cloning the schema (no YAML re-parsing).
pub struct ConcreteDesugarer<C: Default + Clone + Send + Sync + 'static> {
language: tree_sitter::Language,
schema: schema::Schema,
config: DesugaringConfig<C>,
}
impl<C: Default + Clone + Send + Sync + 'static> ConcreteDesugarer<C> {
/// Build a desugarer for `language` from `config`. Parses the output
/// schema YAML once (if set) and stores it for reuse across files.
pub fn new(
language: tree_sitter::Language,
config: DesugaringConfig<C>,
) -> Result<Self, String> {
let schema = config.build_schema(&language)?;
Ok(Self {
language,
schema,
config,
})
}
}
impl<C: Default + Clone + Send + Sync + 'static> Desugarer for ConcreteDesugarer<C> {
fn output_node_types_yaml(&self) -> Option<&'static str> {
self.config.output_node_types_yaml
}
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
runner.run_from_tree(tree, source)
}
}

View File

@@ -242,7 +242,10 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
/// Apply YAML node-type definitions to a mutable Schema.
/// Registers all types, fields, and allowed types from the YAML into the schema.
fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) {
fn apply_yaml_to_schema(
yaml: &YamlNodeTypes,
schema: &mut crate::schema::Schema,
) {
// Register all supertypes as node kinds
for name in yaml.supertypes.keys() {
schema.register_kind(name);
@@ -304,8 +307,7 @@ fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema
.into_vec()
.into_iter()
.map(|type_ref| {
let (kind, named) =
resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect::<Vec<_>>();

View File

@@ -198,8 +198,13 @@ impl Schema {
.insert((parent_kind.to_string(), field_id), node_types);
}
pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec<NodeType>> {
self.field_types.get(&(parent_kind.to_string(), field_id))
pub fn field_types(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<&Vec<NodeType>> {
self.field_types
.get(&(parent_kind.to_string(), field_id))
}
pub fn set_field_cardinality(

View File

@@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
/// Helper: parse Ruby source with no rules, return dump.
fn parse_and_dump(input: &str) -> String {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
@@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
runner.run(input).unwrap()
}
@@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
@@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
runner
.run(input)
.expect_err("expected runner to return an error")
@@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
/// Helper: parse Ruby source with no rules and dump with schema type errors.
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
@@ -64,10 +64,10 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
/// building schema with language IDs so field checks align with parser fields.
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner: Runner = Runner::new(lang.clone(), &[]);
let runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
@@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> Strin
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
@@ -166,7 +166,7 @@ fn test_parse_for_loop() {
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -176,13 +176,13 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -192,25 +192,25 @@ named:
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules: Vec<Rule> = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -219,14 +219,14 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -237,17 +237,17 @@ named:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
#[test]
fn test_query_match() {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -268,7 +268,7 @@ fn test_query_match() {
#[test]
fn test_query_no_match() {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() {
// captured comment to nothing (a common idiom, e.g.
// `(comment) => ()` in Swift) leaves the capture's match-list empty
// and causes the transform to fail with "Variable X has 0 matches".
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("[1, # comment\n2]").unwrap();
// Navigate to the `array` node: program -> array.
@@ -309,11 +309,15 @@ fn test_query_skips_extras_in_positional_match() {
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(),
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(),
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
"integer"
);
}
@@ -321,14 +325,14 @@ fn test_query_skips_extras_in_positional_match() {
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases: Vec<Phase> = vec![Phase::new(
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let phases = vec![Phase::new(
"test",
PhaseKind::Repeating,
vec![yeast::rule!((integer) => (identifier "replaced"))],
)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -346,7 +350,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
#[test]
fn test_query_repeated_capture() {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x, y, z = 1").unwrap();
let query = yeast::query!(
@@ -371,7 +375,7 @@ fn test_query_repeated_capture() {
#[test]
fn test_capture_unnamed_node_parenthesized() {
// `("=") @op` captures the unnamed `=` token between left and right.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -399,7 +403,7 @@ fn test_capture_unnamed_node_parenthesized() {
fn test_capture_bare_underscore_repeated() {
// `_` matches named and unnamed nodes in bare-child position. On this
// assignment shape, bare children correspond to unnamed tokens (the `=`).
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!((assignment _* @all));
@@ -421,7 +425,7 @@ fn test_capture_bare_underscore_repeated() {
#[test]
fn test_capture_unnamed_node_bare_literal() {
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -450,7 +454,7 @@ fn test_bare_underscore_matches_unnamed() {
// Bare `_` matches any node, including unnamed tokens, while `(_)`
// matches only named nodes. Demonstrate by matching the unnamed `=`
// token in the implicit `child` field of an `assignment`.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -489,7 +493,7 @@ fn test_bare_forms_in_field_position() {
// field's value, not just in the bare-children position. This is
// syntactic sugar for `(_)` / `("…")` and goes through the same
// code paths.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -528,7 +532,7 @@ fn test_forward_scan_finds_unnamed_token_late() {
// query for `("end")` skip past the first two and match the third.
// Without forward-scan, the matcher took the first child unconditionally
// and failed.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
// Navigate: program > for > do (the body wrapper).
@@ -555,7 +559,7 @@ fn test_forward_scan_preserves_order() {
// order. A query for ("end") then ("do") should fail because `do`
// appears before `end` in the source order; once forward-scan has
// consumed `end`, the iterator is exhausted.
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -576,7 +580,7 @@ fn test_forward_scan_preserves_order() {
#[test]
fn test_tree_builder() {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let mut ast = runner.run("x = 1").unwrap();
let input = "x = 1";
@@ -594,8 +598,7 @@ fn test_tree_builder() {
// Swap left and right
let fresh = yeast::tree_builder::FreshScope::new();
let mut user_ctx = ();
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx);
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
let new_id = yeast::tree!(ctx,
(program
child: (assignment
@@ -623,7 +626,7 @@ fn test_tree_builder() {
// tree-sitter-ruby grammar with named fields for nodes that only have
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
fn ruby_rules() -> Vec<Rule> {
let assign_rule: Rule = yeast::rule!(
let assign_rule = yeast::rule!(
(assignment
left: (left_assignment_list
(identifier)* @left
@@ -648,7 +651,7 @@ fn ruby_rules() -> Vec<Rule> {
)}
);
let for_rule: Rule = yeast::rule!(
let for_rule = yeast::rule!(
(for
pattern: (_) @pat
value: (in (_) @val)
@@ -730,7 +733,7 @@ fn test_desugar_for_loop() {
#[test]
fn test_shorthand_rule() {
let rule: Rule = yeast::rule!(
let rule = yeast::rule!(
(assignment
left: (_) @method
right: (_) @receiver
@@ -882,7 +885,7 @@ fn test_phase_error_includes_phase_name() {
PhaseKind::Repeating,
vec![swap_assignment_rule().repeated()],
)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected runner to return an error");
@@ -925,7 +928,7 @@ fn test_one_shot_phase() {
PhaseKind::OneShot,
one_shot_xeq1_rules(),
)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -951,7 +954,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() {
let mut rules = one_shot_xeq1_rules();
rules.pop();
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
@@ -975,7 +978,7 @@ fn test_one_shot_recurses_into_returned_capture() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
@@ -991,7 +994,7 @@ fn test_one_shot_recurses_into_returned_capture() {
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -1017,7 +1020,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
@@ -1038,7 +1041,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -1058,115 +1061,11 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
);
}
/// Verify that `@@name` capture markers skip the auto-translate prefix:
/// the body sees the *raw* (input-schema) NodeRef and can read its
/// source text or call `ctx.translate(...)` explicitly. Compare with
/// the bare `@name` form, where the auto-translate prefix runs the
/// same translation up front and the body sees the post-translate id.
#[test]
fn test_raw_capture_marker() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// `@@raw_lhs` is untranslated: the body reads its source text
// ("x") and embeds it directly as the identifier content. `@rhs`
// is auto-translated (rhs already points to (integer "INT")).
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
let text = ctx.ast.source_text(raw_lhs.into());
tree!((call
method: (identifier #{text.as_str()})
receiver: {rhs}))
}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
// `method:` uses the raw source text ("x"); if `@@` were broken and
// auto-translation ran on `raw_lhs`, it would still produce the
// string "x" (source_text inherits the input range), so the dump
// wouldn't change. Add a second assertion: explicitly translating
// the raw NodeRef inside the body must succeed and produce
// `(identifier "ID")`.
assert_dump_eq(
&dump,
r#"
program
stmt:
call
method: identifier "x"
receiver: integer "INT"
"#,
);
}
/// Companion to `test_raw_capture_marker`: confirms that calling
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
/// produces the correctly-translated output-schema node. With `@`, the
/// translation has already happened, so `ctx.translate(...)` inside the
/// body would attempt to re-translate an output node (which has no
/// matching rule and would error).
#[test]
fn test_raw_capture_marker_explicit_translate() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
let translated_lhs = ctx.translate(raw_lhs)?;
tree!((call
method: {..translated_lhs}
receiver: {rhs}))
}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
call
method: identifier "ID"
receiver: integer "INT"
"#,
);
}
// ---- Cursor tests ----
#[test]
fn test_cursor_navigation() {
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -1240,7 +1139,7 @@ fn test_desugar_for_with_multiple_assignment() {
/// resolves to the captured node's source text via `YeastDisplay`.
#[test]
fn test_hash_brace_renders_capture_source_text() {
let rule: Rule = rule!(
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
@@ -1269,7 +1168,7 @@ fn test_hash_brace_renders_capture_source_text() {
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
#[test]
fn test_hash_brace_renders_integer_expression() {
let rule: Rule = rule!(
let rule = rule!(
(identifier) @_
=>
(identifier #{1 + 2})
@@ -1288,7 +1187,7 @@ fn test_hash_brace_renders_integer_expression() {
/// source location, not the full source range of the matched rule root.
#[test]
fn test_hash_brace_uses_capture_location_for_leaf() {
let rule: Rule = rule!(
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
@@ -1305,9 +1204,7 @@ fn test_hash_brace_uses_capture_location_for_leaf() {
let mut bar_ids: Vec<usize> = Vec::new();
for id in ast.reachable_node_ids() {
let Some(node) = ast.get_node(id) else {
continue;
};
let Some(node) = ast.get_node(id) else { continue; };
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
bar_ids.push(id);
}

View File

@@ -7,7 +7,6 @@ codeql_rust_binary(
name = "extractor",
srcs = glob(["src/**/*.rs"]),
aliases = aliases(),
compile_data = ["ast_types.yml"],
proc_macro_deps = all_crate_deps(
proc_macro = True,
),

View File

@@ -1,9 +1,9 @@
use clap::Args;
use std::path::PathBuf;
use crate::languages;
use codeql_extractor::extractor::simple;
use codeql_extractor::trap;
use crate::languages;
#[derive(Args)]
pub struct Options {
@@ -35,9 +35,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
prefix: "unified".to_string(),
languages,
trap_dir: options.output_dir,
trap_compression: trap::Compression::from_env(
"CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION",
),
trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"),
source_archive_dir: options.source_archive_dir,
file_lists: vec![options.file_list],
};

View File

@@ -22,19 +22,14 @@ pub fn run(options: Options) -> std::io::Result<()> {
// The QL-visible schema is the unified output AST, not the per-language
// input grammars. Pass it via `desugar.output_node_types_yaml` so the
// generator converts the YAML to JSON node-types.
let desugar =
yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
let desugar = yeast::DesugaringConfig::new()
.with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
let languages = vec![Language {
name: "Unified".to_owned(),
node_types: "", // unused: generator picks up output_node_types_yaml above
node_types: "", // unused: generator picks up output_node_types_yaml above
desugar: Some(desugar),
}];
generate(
languages,
options.dbscheme,
options.library,
"run unified/scripts/create-extractor-pack.sh",
)
generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh")
}

View File

@@ -1,98 +1,7 @@
use codeql_extractor::extractor::simple;
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, rule, tree};
use yeast::{rule, DesugaringConfig, PhaseKind};
/// User context propagated from outer rules down to the inner rules that
/// emit the corresponding output declarations, so that each emitted node
/// is born with the outer information (name, type, modifiers, etc.)
/// already set — no schema-invalid intermediate state requiring
/// post-hoc mutation.
#[derive(Clone, Default)]
struct SwiftContext {
/// Identifier node for the property name. Set by the outer
/// `property_binding` (computed accessors / willSet-didSet) and
/// `protocol_property_declaration` rules before translating accessor
/// children; read by the accessor inner rules
/// (`computed_getter`/`computed_setter`/`computed_modify`/
/// `willset_clause`/`didset_clause`/`getter_specifier`/
/// `setter_specifier`).
property_name: Option<yeast::NodeRef>,
/// Translated type node for the property type. Set by the outer
/// `property_binding` rule (computed accessors variant) and
/// `protocol_property_declaration` when present; read by the
/// accessor inner rules.
property_type: Option<yeast::NodeRef>,
/// Default-value expression for the next translated `parameter`. Set
/// by the outer `function_parameter` rule; read by the `parameter`
/// rules.
default_value: Option<yeast::NodeRef>,
/// Translated outer modifiers (e.g. visibility, attributes) to
/// attach to each child of a flattening outer rule. Set by
/// `property_declaration`, `enum_entry`, and
/// `protocol_property_declaration`.
outer_modifiers: Vec<yeast::NodeRef>,
/// The `let`/`var` binding modifier for a `property_declaration`.
/// Set by `property_declaration`; read by the inner declaration
/// rules (`property_binding` variants, accessor rules) so they
/// emit it as part of the output node's `modifier:` field.
binding_modifier: Option<yeast::NodeRef>,
/// True when the current child of a flattening outer rule is not
/// the first one — its inner rule should emit a
/// `chained_declaration` modifier so the original grouping can be
/// recovered downstream.
is_chained: bool,
}
/// Build a freshly-created `chained_declaration` modifier node if
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
/// emit the chained tag for non-first children of a flattening outer
/// rule. Returns `Option<NodeRef>` so it splices via `{..…}` to 0 or 1 ids.
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::NodeRef> {
if ctx.is_chained {
Some(ctx.literal("modifier", "chained_declaration").into())
} else {
None
}
}
/// Combine a list of boolean sub-conditions into a single expression by
/// left-folding with the infix `&&` operator. Used by control-flow
/// rules (`if`, `guard`, `while`, `repeat-while`) whose tree-sitter
/// nodes carry one or more comma-separated conditions that the target
/// AST represents as a single `condition:` field. Panics on an empty
/// input because every caller's grammar guarantees at least one
/// condition.
fn and_chain(
ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>,
conds: Vec<yeast::NodeRef>,
) -> yeast::Id {
conds.into_iter()
.map(yeast::Id::from)
.reduce(|acc, elem| {
tree!((binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
})
.expect("control-flow statement must have at least one condition")
}
/// Translate a multi-part identifier (for example `Foo.Bar.Baz`) into a
/// `member_access_expr` chain rooted at a `name_expr` over the first
/// part. Panics on an empty input because the grammar's `_+` quantifier
/// guarantees at least one part.
fn member_chain(
ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>,
parts: Vec<yeast::NodeRef>,
) -> yeast::Id {
let mut iter = parts.into_iter();
let first = iter
.next()
.expect("identifier with `part:` must have at least one part");
let init = tree!((name_expr identifier: (identifier #{first})));
iter.fold(
init,
|acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))),
)
}
fn translation_rules() -> Vec<Rule<SwiftContext>> {
fn translation_rules() -> Vec<yeast::Rule> {
vec![
// ---- Top-level ----
// Capture all top-level statements, including unnamed tokens like `nil`.
@@ -179,43 +88,32 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
// nodes for individual declarators. The outer property_declaration rule splices these out
// and attaches binding/modifiers from the parent.
// Computed property with explicit accessors (get/set/modify) → a
// sequence of `accessor_declaration` nodes. The outer rule
// publishes the property's name and type into `ctx` so that each
// inner accessor rule
// (`computed_getter`/`computed_setter`/`computed_modify`) builds
// its `accessor_declaration` with `name` and `type` set from the
// start — no schema-invalid intermediate state.
//
// Toggles `ctx.is_chained` per accessor iteration: the first
// accessor inherits the outer rule's chained state (i.e. whether
// this whole property_binding is itself a non-first declarator
// of a containing property_declaration); subsequent accessors
// always emit `chained_declaration`.
// Computed property with explicit accessors (get/set/modify) →
// a sequence of accessor_declaration nodes, each with the property name
// attached. Subsequent accessors will be tagged chained_declaration by
// the outer property_declaration rule.
rule!(
(property_binding
name: @pattern
type: _? @ty
computed_value: (computed_property accessor: _+ @@accessors))
computed_value: (computed_property accessor: _+ @accessors))
=>
{..{
ctx.property_name = Some(tree!((identifier #{pattern})).into());
ctx.property_type = ty;
let mut result = Vec::new();
for (i, acc) in accessors.into_iter().enumerate() {
if i > 0 {
ctx.is_chained = true;
let name_text = __yeast_ctx.ast.source_text(pattern.into());
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
let acc_ids: Vec<usize> = accessors.iter().map(|&a| a.into()).collect();
for &acc_id in &acc_ids {
let ident = __yeast_ctx.literal("identifier", &name_text);
__yeast_ctx.prepend_field(acc_id, "name", ident);
for &ty_id in ty_ids.iter().rev() {
__yeast_ctx.prepend_field(acc_id, "type", ty_id);
}
result.extend(ctx.translate(acc)?);
}
result
acc_ids
}}
),
// Computed property: shorthand getter (no explicit get/set, just
// statements) → a single accessor_declaration with kind "get".
// Reads outer modifiers / chained tag from `ctx` (set by the
// outer `property_declaration` rule).
// Computed property: shorthand getter (no explicit get/set, just statements) →
// a single accessor_declaration with kind "get".
rule!(
(property_binding
name: (pattern bound_identifier: @name)
@@ -223,58 +121,49 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
computed_value: (computed_property statement: _* @body))
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: (identifier #{name})
type: {..ty}
accessor_kind: (accessor_kind "get")
body: (block stmt: {..body}))
),
// Stored property with willSet/didSet observers (initializer
// optional) → a `variable_declaration` followed by one
// `accessor_declaration` per observer, each born with the
// property name set. Manual rule: we publish the property name
// into `ctx` before translating the observer children so the
// inner `willset_clause` / `didset_clause` rules construct
// valid `accessor_declaration` nodes from the start.
//
// The `variable_declaration` itself inherits the outer rule's
// chained state; observers always get `chained_declaration`
// because they're subsequent outputs of this flattening rule.
// Stored property with willSet/didSet observers (initializer optional) →
// variable_declaration followed by one accessor_declaration per observer,
// each carrying the property name. Subsequent items are tagged
// chained_declaration by the outer property_declaration rule.
rule!(
(property_binding
name: (pattern bound_identifier: @name)
type: _? @ty
value: _? @val
observers: (willset_didset_block willset: _? @@ws didset: _? @@ds))
observers: (willset_didset_block willset: _? @ws didset: _? @ds))
=>
{..{
let var_decl = tree!(
(variable_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
pattern: (name_pattern identifier: (identifier #{name}))
type: {..ty}
value: {..val})
);
// Publish the property name for the observer rules.
ctx.property_name = Some(tree!((identifier #{name})).into());
// Observers are subsequent outputs of this flattening
// rule, so they always get `chained_declaration`.
ctx.is_chained = true;
let mut result = vec![var_decl];
for obs in ws.into_iter().chain(ds) {
result.extend(ctx.translate(obs)?);
let name_text = __yeast_ctx.ast.source_text(name.into());
let val_ids: Vec<usize> = val.iter().map(|&v| v.into()).collect();
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
let mut obs_ids: Vec<usize> = Vec::new();
obs_ids.extend(ws.iter().map(|&o| { let id: usize = o.into(); id }));
obs_ids.extend(ds.iter().map(|&o| { let id: usize = o.into(); id }));
let ident_for_var = __yeast_ctx.literal("identifier", &name_text);
let pat = __yeast_ctx.node("name_pattern", vec![("identifier", vec![ident_for_var])]);
let mut var_fields: Vec<(&str, Vec<usize>)> = vec![("pattern", vec![pat])];
if !ty_ids.is_empty() {
var_fields.push(("type", ty_ids));
}
if !val_ids.is_empty() {
var_fields.push(("value", val_ids));
}
let var_id = __yeast_ctx.node("variable_declaration", var_fields);
let mut result = vec![var_id];
for obs_id in obs_ids {
let ident = __yeast_ctx.literal("identifier", &name_text);
__yeast_ctx.prepend_field(obs_id, "name", ident);
result.push(obs_id);
}
result
}}
),
// property_binding with any pattern name (identifier or
// destructuring). Reads outer modifiers / chained tag from `ctx`.
// property_binding with any pattern name (identifier or destructuring)
rule!(
(property_binding
name: @pattern
@@ -282,40 +171,36 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
value: _? @val)
=>
(variable_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
pattern: {pattern}
type: {..ty}
value: {..val})
),
// property_declaration: flatten declarators (each may translate
// to multiple nodes — variable_declaration and/or
// accessor_declaration) and attach the binding modifier
// (let/var), outer modifiers, and `chained_declaration` for
// non-first declarations. Manual rule: publishes
// binding/outer modifiers into `ctx` and translates each
// declarator with `ctx.is_chained` toggled per iteration. The
// inner declaration rules (`property_binding` variants,
// accessor inner rules) read these fields and emit complete
// `modifier:` lists from the start.
// property_declaration: splice declarators (each may translate to multiple nodes —
// variable_declaration and/or accessor_declaration), and attach the binding modifier
// (let/var) and any outer modifiers to each. All children after the first additionally
// get a synthetic chained_declaration modifier so the grouping can be recovered.
rule!(
(property_declaration
binding: (value_binding_pattern mutability: @binding_kind)
declarator: _* @@decls
declarator: _* @decls
(modifiers)* @mods)
=>
{..{
let binding_text = ctx.ast.source_text(binding_kind.into());
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text).into());
ctx.outer_modifiers = mods;
let mut result = Vec::new();
for (i, decl) in decls.into_iter().enumerate() {
ctx.is_chained = i > 0;
result.extend(ctx.translate(decl)?);
let binding_text = __yeast_ctx.ast.source_text(binding_kind.into());
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
let decl_ids: Vec<usize> = decls.iter().map(|&d| d.into()).collect();
for (i, &decl_id) in decl_ids.iter().enumerate() {
if i > 0 {
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
__yeast_ctx.prepend_field(decl_id, "modifier", chained);
}
for &mod_id in mod_ids.iter().rev() {
__yeast_ctx.prepend_field(decl_id, "modifier", mod_id);
}
let binding_mod = __yeast_ctx.literal("modifier", &binding_text);
__yeast_ctx.prepend_field(decl_id, "modifier", binding_mod);
}
result
decl_ids
}}
),
// ---- Enums ----
@@ -332,18 +217,14 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
=>
(parameter type: {ty})
),
// enum_case_entry with associated values → class_like_declaration
// containing a constructor whose parameters are the data
// parameters. Reads outer modifiers / chained tag from `ctx`
// (set by the outer `enum_entry` rule).
// enum_case_entry with associated values → class_like_declaration containing
// a constructor whose parameters are the data parameters.
rule!(
(enum_case_entry
name: @name
data_contents: (enum_type_parameters parameter: _* @params))
=>
(class_like_declaration
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
modifier: (modifier "enum_case")
name: (identifier #{name})
member: (constructor_declaration parameter: {..params} body: (block)))
@@ -353,8 +234,6 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(enum_case_entry name: @name raw_value: @val)
=>
(variable_declaration
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
modifier: (modifier "enum_case")
pattern: (name_pattern identifier: (identifier #{name}))
value: {val})
@@ -364,27 +243,27 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(enum_case_entry name: @name)
=>
(variable_declaration
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
modifier: (modifier "enum_case")
pattern: (name_pattern identifier: (identifier #{name})))
),
// enum_entry: flatten case entries; publish outer modifiers
// into `ctx` and translate each case with `ctx.is_chained`
// toggled per iteration so the inner `enum_case_entry` rules
// emit complete `modifier:` lists from the start.
// enum_entry: flatten case entries; attach outer modifiers to each, and
// chained_declaration on every entry after the first.
rule!(
(enum_entry case: _+ @@cases (modifiers)* @mods)
(enum_entry case: _+ @cases (modifiers)* @mods)
=>
{..{
ctx.outer_modifiers = mods;
let mut result = Vec::new();
for (i, case) in cases.into_iter().enumerate() {
ctx.is_chained = i > 0;
result.extend(ctx.translate(case)?);
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
let case_ids: Vec<usize> = cases.iter().map(|&c| c.into()).collect();
for (i, &case_id) in case_ids.iter().enumerate() {
if i > 0 {
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
__yeast_ctx.prepend_field(case_id, "modifier", chained);
}
for &mod_id in mod_ids.iter().rev() {
__yeast_ctx.prepend_field(case_id, "modifier", mod_id);
}
}
result
case_ids
}}
),
// Plain assignment: `x = expr`
@@ -457,15 +336,16 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
body: (block stmt: {..body_stmts}))
),
// Parameters are wrapped in function_parameter, which also carries
// optional default values. Publishes the default value into `ctx`
// before translating the inner `parameter` so the `parameter`
// rules can include it as a `default:` field directly.
// optional default values.
rule!(
(function_parameter parameter: @@p default_value: _? @def)
(function_parameter parameter: @p default_value: _? @def)
=>
{..{
ctx.default_value = def;
ctx.translate(p)?
let p_id: usize = p.into();
for &d in def.iter().rev() {
__yeast_ctx.prepend_field(p_id, "default", d.into());
}
vec![p_id]
}}
),
// Parameter with external name and type
@@ -474,8 +354,7 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
=>
(parameter
external_name: (identifier #{ext})
pattern: (name_pattern identifier: (identifier #{name}))
default: {..ctx.default_value})
pattern: (name_pattern identifier: (identifier #{name})))
),
rule!(
(parameter external_name: @ext name: @name type: @ty)
@@ -483,24 +362,21 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(parameter
external_name: (identifier #{ext})
pattern: (name_pattern identifier: (identifier #{name}))
type: {ty}
default: {..ctx.default_value})
type: {ty})
),
// Parameter with just name and type (no external name)
rule!(
(parameter name: @name)
=>
(parameter
pattern: (name_pattern identifier: (identifier #{name}))
default: {..ctx.default_value})
pattern: (name_pattern identifier: (identifier #{name})))
),
rule!(
(parameter name: @name type: @ty)
=>
(parameter
pattern: (name_pattern identifier: (identifier #{name}))
type: {ty}
default: {..ctx.default_value})
type: {ty})
),
// Reference to a function, f(x:y:z:). This is parsed as a call with a single argument with multiple reference_specifier labels.
// We don't want downstream QL to try to handle this as a call_expr with a weird argument, so explicitly mark it as unsupported for now.
@@ -608,12 +484,11 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
argument: (argument value: {closure}))
),
// ---- Control flow ----
// If statement
rule!(
(if_statement condition: _* @cond body: @then_body else_branch: _? @else_stmts)
=>
(if_expr
condition: {and_chain(&mut ctx, cond)}
condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
then: {then_body}
else: {..else_stmts})
),
@@ -622,7 +497,7 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(guard_statement condition: _* @cond body: (block statement: _* @else_stmts))
=>
(guard_if_stmt
condition: {and_chain(&mut ctx, cond)}
condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
else: (block stmt: {..else_stmts}))
),
// Ternary expression → if_expr
@@ -700,24 +575,20 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
rule!(
(while_statement condition: _* @cond body: (block statement: _* @body))
=>
(while_stmt
condition: {and_chain(&mut ctx, cond)}
body: (block stmt: {..body}))
(while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body}))
),
// Repeat-while loop
rule!(
(repeat_while_statement condition: _* @cond body: (block statement: _* @body))
=>
(do_while_stmt
condition: {and_chain(&mut ctx, cond)}
body: (block stmt: {..body}))
(do_while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body}))
),
// Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token.
rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {
let text = ctx.ast.source_text(lbl.into());
let name = &text[..text.len() - 1];
tree!((labeled_stmt label: (identifier #{name}) stmt: {stmt}))
}),
rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{
let text = __yeast_ctx.ast.source_text(lbl.into());
let name = __yeast_ctx.literal("identifier", &text[..text.len() - 1]);
vec![__yeast_ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])]
}}),
// ---- Collections ----
// Array literal
rule!((array_literal element: _* @elems) => (array_literal element: {..elems})),
@@ -727,9 +598,16 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
rule!(
(dictionary_literal key: _* @keys value: _* @vals)
=>
(map_literal element: {..keys.into_iter().zip(vals).map(|(k, v)|
tree!((key_value_pair key: {k} value: {v}))
)})
(map_literal element: {..{
keys.iter().zip(vals.iter()).map(|(&k, &v)| {
let k_id: usize = k.into();
let v_id: usize = v.into();
__yeast_ctx.node("key_value_pair", vec![
("key", vec![k_id]),
("value", vec![v_id]),
])
}).collect::<Vec<_>>()
}})
),
rule!((dictionary_literal element: _* @elems) => (map_literal element: {..elems})),
rule!((dictionary_literal_item key: @k value: @v) => (key_value_pair key: {k} value: {v})),
@@ -791,7 +669,9 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
rule!(
(identifier part: _+ @parts)
=>
{member_chain(&mut ctx, parts)}
{parts}.reduce_left(
first -> (name_expr identifier: (identifier #{first})),
acc, elem -> (member_access_expr base: {acc} member: (identifier #{elem})))
),
// Scoped import declaration (for example `import struct Foo.Bar`):
// flatten the identifier parts into a member_access_expr and bind the
@@ -994,73 +874,48 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
name: (identifier #{name})
bound: {..bound})
),
// Protocol property declaration: translate each accessor
// requirement to an `accessor_declaration` carrying the property
// name, type, and outer modifiers. Manual rule: we publish the
// property's name/type/modifiers into `ctx` and translate each
// accessor with `ctx.is_chained` toggled per iteration so the
// inner `getter_specifier`/`setter_specifier` rules emit
// complete nodes from the start (including the
// `chained_declaration` tag for non-first accessors).
// Protocol property declaration: translate each accessor requirement to an
// accessor_declaration without a body, carrying the property name and type.
// Subsequent accessors get chained_declaration (same flattening as computed properties).
rule!(
(protocol_property_declaration
name: (pattern bound_identifier: @name)
requirements: (protocol_property_requirements accessor: _+ @@accessors)
name: @pattern
requirements: (protocol_property_requirements accessor: _+ @accessors)
type: _? @ty
(modifiers)* @mods)
=>
{..{
ctx.property_name = Some(tree!((identifier #{name})).into());
ctx.property_type = ty;
ctx.outer_modifiers = mods;
let mut result = Vec::new();
for (i, acc) in accessors.into_iter().enumerate() {
ctx.is_chained = i > 0;
result.extend(ctx.translate(acc)?);
let name_text = __yeast_ctx.ast.source_text(pattern.into());
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
let acc_ids: Vec<usize> = accessors.iter().map(|&a| a.into()).collect();
for (i, &acc_id) in acc_ids.iter().enumerate() {
if i > 0 {
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
__yeast_ctx.prepend_field(acc_id, "modifier", chained);
}
for &mod_id in mod_ids.iter().rev() {
__yeast_ctx.prepend_field(acc_id, "modifier", mod_id);
}
for &ty_id in ty_ids.iter().rev() {
__yeast_ctx.prepend_field(acc_id, "type", ty_id);
}
let ident = __yeast_ctx.literal("identifier", &name_text);
__yeast_ctx.prepend_field(acc_id, "name", ident);
}
result
acc_ids
}}
),
// getter_specifier / setter_specifier → bodyless accessor_declaration
// getter_specifier / setter_specifier → bodyless
// accessor_declaration. Reads property name/type/modifiers from
// `ctx` set by the outer `protocol_property_declaration` rule.
rule!(
(getter_specifier)
=>
(accessor_declaration
name: {ctx.property_name.ok_or("getter_specifier outside protocol_property_declaration context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "get")
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)})
),
rule!(
(setter_specifier)
=>
(accessor_declaration
name: {ctx.property_name.ok_or("setter_specifier outside protocol_property_declaration context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "set")
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)})
),
rule!((getter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "get"))),
rule!((setter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "set"))),
// protocol_property_requirements wrapper — should be consumed by above; fallback
rule!((protocol_property_requirements accessor: _* @accs) => {..accs}),
// Computed getter → accessor_declaration (body optional).
// Reads property name/type from the outer property_binding rule
// and binding/outer modifiers + chained tag from the outer
// property_declaration rule.
rule!(
(computed_getter body: (block statement: _* @body)?)
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("computed_getter outside property_binding context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "get")
body: (block stmt: {..body}))
),
@@ -1069,11 +924,6 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(computed_setter parameter: @param body: (block statement: _* @body))
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "set")
parameter: (parameter pattern: (name_pattern identifier: (identifier #{param})))
body: (block stmt: {..body}))
@@ -1083,11 +933,6 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(computed_setter body: (block statement: _* @body)?)
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "set")
body: (block stmt: {..body}))
),
@@ -1096,30 +941,16 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(computed_modify body: (block statement: _* @body))
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("computed_modify outside property_binding context")?}
type: {..ctx.property_type}
accessor_kind: (accessor_kind "modify")
body: (block stmt: {..body}))
),
// willset/didset block — spread to children (only reachable as a
// fallback; the outer property_binding manual rule normally
// captures the willset/didset clauses directly).
// willset/didset block — spread to children
rule!((willset_didset_block _* @clauses) => {..clauses}),
// willset clause → accessor_declaration (body optional). Reads
// `ctx.property_name` set by the outer property_binding rule and
// binding/outer modifiers + chained tag from the outer
// property_declaration rule.
// willset clause → accessor_declaration (body optional).
rule!(
(willset_clause body: (block statement: _* @body)?)
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("willset_clause outside property_binding context")?}
accessor_kind: (accessor_kind "willSet")
body: (block stmt: {..body}))
),
@@ -1128,10 +959,6 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
(didset_clause body: (block statement: _* @body)?)
=>
(accessor_declaration
modifier: {..ctx.binding_modifier}
modifier: {..ctx.outer_modifiers.clone()}
modifier: {..chained_modifier(&mut ctx)}
name: {ctx.property_name.ok_or("didset_clause outside property_binding context")?}
accessor_kind: (accessor_kind "didSet")
body: (block stmt: {..body}))
),
@@ -1152,17 +979,14 @@ fn translation_rules() -> Vec<Rule<SwiftContext>> {
}
pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec {
let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into();
let config = DesugaringConfig::<SwiftContext>::new()
let desugar = DesugaringConfig::new()
.add_phase("translate", PhaseKind::OneShot, translation_rules())
.with_output_node_types_yaml(desugared_ast_schema);
let desugarer = ConcreteDesugarer::new(ts_language.clone(), config)
.expect("failed to build Swift desugarer");
simple::LanguageSpec {
prefix: "swift",
ts_language,
ts_language: tree_sitter_swift::LANGUAGE.into(),
node_types: tree_sitter_swift::NODE_TYPES,
file_globs: vec!["*.swift".into(), "*.swiftinterface".into()],
desugar: Some(Box::new(desugarer)),
desugar: Some(desugar),
}
}

View File

@@ -924,159 +924,3 @@ top_level
accessor_kind: accessor_kind "set"
modifier: modifier "class"
name: identifier "Box"
===
Protocol with read-only and read-write property requirements
===
protocol P {
var foo: Int { get }
var bar: String { get set }
}
---
source_file
statement:
protocol_declaration
body:
protocol_body
member:
protocol_property_declaration
name:
pattern
binding:
value_binding_pattern
mutability: var
bound_identifier: simple_identifier "foo"
requirements:
protocol_property_requirements
accessor:
getter_specifier
type:
type_annotation
type:
type
name:
user_type
part:
simple_user_type
name: type_identifier "Int"
protocol_property_declaration
name:
pattern
binding:
value_binding_pattern
mutability: var
bound_identifier: simple_identifier "bar"
requirements:
protocol_property_requirements
accessor:
getter_specifier
setter_specifier
type:
type_annotation
type:
type
name:
user_type
part:
simple_user_type
name: type_identifier "String"
name: type_identifier "P"
---
top_level
body:
block
stmt:
class_like_declaration
member:
accessor_declaration
name: identifier "foo"
type:
named_type_expr
name: identifier "Int"
accessor_kind: accessor_kind "get"
accessor_declaration
name: identifier "bar"
type:
named_type_expr
name: identifier "String"
accessor_kind: accessor_kind "get"
accessor_declaration
modifier: modifier "chained_declaration"
name: identifier "bar"
type:
named_type_expr
name: identifier "String"
accessor_kind: accessor_kind "set"
modifier: modifier "protocol"
name: identifier "P"
===
Enum with comma-separated cases (chained_declaration)
===
enum Suit {
case clubs, diamonds, hearts, spades
}
---
source_file
statement:
class_declaration
body:
enum_class_body
member:
enum_entry
case:
enum_case_entry
name: simple_identifier "clubs"
enum_case_entry
name: simple_identifier "diamonds"
enum_case_entry
name: simple_identifier "hearts"
enum_case_entry
name: simple_identifier "spades"
declaration_kind: enum
name: type_identifier "Suit"
---
top_level
body:
block
stmt:
class_like_declaration
member:
variable_declaration
modifier: modifier "enum_case"
pattern:
name_pattern
identifier: identifier "clubs"
variable_declaration
modifier:
modifier "chained_declaration"
modifier "enum_case"
pattern:
name_pattern
identifier: identifier "diamonds"
variable_declaration
modifier:
modifier "chained_declaration"
modifier "enum_case"
pattern:
name_pattern
identifier: identifier "hearts"
variable_declaration
modifier:
modifier "chained_declaration"
modifier "enum_case"
pattern:
name_pattern
identifier: identifier "spades"
modifier: modifier "enum"
name: identifier "Suit"

View File

@@ -319,130 +319,3 @@ top_level
name_expr
identifier: identifier "x"
value: int_literal "1"
===
Property with willSet and didSet observers
===
class C {
var x: Int = 0 {
willSet { print(newValue) }
didSet { print(oldValue) }
}
}
---
source_file
statement:
class_declaration
body:
class_body
member:
property_declaration
binding:
value_binding_pattern
mutability: var
declarator:
property_binding
name:
pattern
bound_identifier: simple_identifier "x"
observers:
willset_didset_block
didset:
didset_clause
body:
block
statement:
call_expression
function: simple_identifier "print"
suffix:
call_suffix
arguments:
value_arguments
argument:
value_argument
value: simple_identifier "oldValue"
willset:
willset_clause
body:
block
statement:
call_expression
function: simple_identifier "print"
suffix:
call_suffix
arguments:
value_arguments
argument:
value_argument
value: simple_identifier "newValue"
type:
type_annotation
type:
type
name:
user_type
part:
simple_user_type
name: type_identifier "Int"
value: integer_literal "0"
declaration_kind: class
name: type_identifier "C"
---
top_level
body:
block
stmt:
class_like_declaration
member:
variable_declaration
modifier: modifier "var"
pattern:
name_pattern
identifier: identifier "x"
type:
named_type_expr
name: identifier "Int"
value: int_literal "0"
accessor_declaration
body:
block
stmt:
call_expr
argument:
argument
value:
name_expr
identifier: identifier "newValue"
callee:
name_expr
identifier: identifier "print"
modifier:
modifier "var"
modifier "chained_declaration"
name: identifier "x"
accessor_kind: accessor_kind "willSet"
accessor_declaration
body:
block
stmt:
call_expr
argument:
argument
value:
name_expr
identifier: identifier "oldValue"
callee:
name_expr
identifier: identifier "print"
modifier:
modifier "var"
modifier "chained_declaration"
name: identifier "x"
accessor_kind: accessor_kind "didSet"
modifier: modifier "class"
name: identifier "C"

View File

@@ -2,7 +2,7 @@ use std::fs;
use std::path::Path;
use codeql_extractor::extractor::simple;
use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors};
use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner};
#[path = "../src/languages/mod.rs"]
mod languages;
@@ -146,36 +146,29 @@ fn render_corpus(cases: &[CorpusCase]) -> String {
out
}
fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result<yeast::Ast, String> {
match lang.desugar.as_deref() {
Some(desugarer) => {
// Parse the input ourselves so we don't depend on the desugarer
// knowing about the language.
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&lang.ts_language)
.map_err(|e| format!("Failed to set language: {e}"))?;
let tree = parser
.parse(input, None)
.ok_or_else(|| "Failed to parse input".to_string())?;
desugarer
.run_from_tree(&tree, input.as_bytes())
.map_err(|e| format!("Desugaring failed: {e}"))
}
None => {
let runner: Runner = Runner::new(lang.ts_language.clone(), &[]);
runner
.run(input)
.map_err(|e| format!("Failed to parse input: {e}"))
}
}
fn run_desugaring(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<yeast::Ast, String> {
let runner = match lang.desugar.as_ref() {
Some(config) => Runner::from_config(lang.ts_language.clone(), config)
.map_err(|e| format!("Failed to create yeast runner: {e}"))?,
None => Runner::new(lang.ts_language.clone(), &[]),
};
runner
.run(input)
.map_err(|e| format!("Failed to parse input: {e}"))
}
/// Produce the raw tree-sitter parse tree dump for `input`, with no
/// desugaring rules applied. Uses a `Runner` with an empty phase list and
/// the input grammar's own schema.
fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result<String, String> {
let runner: Runner = Runner::new(lang.ts_language.clone(), &[]);
fn dump_raw_parse(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<String, String> {
let runner = Runner::new(lang.ts_language.clone(), &[]);
let ast = runner
.run(input)
.map_err(|e| format!("Failed to parse input: {e}"))?;
@@ -279,7 +272,11 @@ fn test_corpus() {
}
}
assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n");
assert!(
failures.is_empty(),
"{}",
failures.join("\n\n") + "\n\n"
);
if update_mode {
let updated = render_corpus(&cases);
@@ -288,9 +285,7 @@ fn test_corpus() {
write_result.is_ok(),
"Failed to update corpus file {}: {}",
corpus_path.display(),
write_result
.err()
.map_or_else(String::new, |e| e.to_string())
write_result.err().map_or_else(String::new, |e| e.to_string())
);
}
}

View File

@@ -16,9 +16,7 @@ fn main() {
Some(&grammar_js),
tree_sitter_generate::ABI_VERSION_MAX,
None,
// Evaluate grammar.js with the embedded QuickJS runtime instead of
// spawning `node`, which isn't available inside Bazel's sandbox.
Some("native"),
None,
true,
tree_sitter_generate::OptLevel::default(),
)

View File

@@ -1,16 +0,0 @@
/**
* Provides shared predicates related to contextual queries in the code viewer.
*/
private import codeql.files.FileSystem
private import codeql.util.FileSystem
/**
* Returns an appropriately encoded version of a filename `name`
* passed by the VS Code extension in order to coincide with the
* output of `.getFile()` on locatable entities.
*/
cached
File getFileBySourceArchiveName(string name) {
result = IdeContextual<File>::getFileBySourceArchiveName(name)
}

View File

@@ -1439,328 +1439,4 @@ module Unified {
unified_while_stmt_modifier(this, _, result)
}
}
/** Provides predicates for mapping AST nodes to their named children. */
module PrintAst {
/** Gets a child of `node` returned by the member predicate with the given `name`. If the predicate takes an index argument, `i` is bound to that index, otherwise `i` is `-1` (which is never a valid index). */
AstNode getChild(AstNode node, string name, int i) {
result = node.(AccessorDeclaration).getAccessorKind() and i = -1 and name = "getAccessorKind"
or
result = node.(AccessorDeclaration).getBody() and i = -1 and name = "getBody"
or
result = node.(AccessorDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(AccessorDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(AccessorDeclaration).getParameter(i) and name = "getParameter"
or
result = node.(AccessorDeclaration).getType() and i = -1 and name = "getType"
or
result = node.(Argument).getModifier(i) and name = "getModifier"
or
result = node.(Argument).getName() and i = -1 and name = "getName"
or
result = node.(Argument).getValue() and i = -1 and name = "getValue"
or
result = node.(ArrayLiteral).getElement(i) and name = "getElement"
or
result = node.(AssignExpr).getTarget() and i = -1 and name = "getTarget"
or
result = node.(AssignExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(AssociatedTypeDeclaration).getBound() and i = -1 and name = "getBound"
or
result = node.(AssociatedTypeDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(AssociatedTypeDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(BaseType).getModifier(i) and name = "getModifier"
or
result = node.(BaseType).getType() and i = -1 and name = "getType"
or
result = node.(BinaryExpr).getLeft() and i = -1 and name = "getLeft"
or
result = node.(BinaryExpr).getOperator() and i = -1 and name = "getOperator"
or
result = node.(BinaryExpr).getRight() and i = -1 and name = "getRight"
or
result = node.(Block).getStmt(i) and name = "getStmt"
or
result = node.(BoundTypeConstraint).getBound() and i = -1 and name = "getBound"
or
result = node.(BoundTypeConstraint).getType() and i = -1 and name = "getType"
or
result = node.(BreakExpr).getLabel() and i = -1 and name = "getLabel"
or
result = node.(BulkImportingPattern).getModifier(i) and name = "getModifier"
or
result = node.(CallExpr).getArgument(i) and name = "getArgument"
or
result = node.(CallExpr).getCallee() and i = -1 and name = "getCallee"
or
result = node.(CallExpr).getModifier(i) and name = "getModifier"
or
result = node.(CatchClause).getBody() and i = -1 and name = "getBody"
or
result = node.(CatchClause).getGuard() and i = -1 and name = "getGuard"
or
result = node.(CatchClause).getModifier(i) and name = "getModifier"
or
result = node.(CatchClause).getPattern() and i = -1 and name = "getPattern"
or
result = node.(ClassLikeDeclaration).getBaseType(i) and name = "getBaseType"
or
result = node.(ClassLikeDeclaration).getMember(i) and name = "getMember"
or
result = node.(ClassLikeDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(ClassLikeDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(ClassLikeDeclaration).getTypeConstraint(i) and name = "getTypeConstraint"
or
result = node.(ClassLikeDeclaration).getTypeParameter(i) and name = "getTypeParameter"
or
result = node.(CompoundAssignExpr).getOperator() and i = -1 and name = "getOperator"
or
result = node.(CompoundAssignExpr).getTarget() and i = -1 and name = "getTarget"
or
result = node.(CompoundAssignExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(ConstructorDeclaration).getBody() and i = -1 and name = "getBody"
or
result = node.(ConstructorDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(ConstructorDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(ConstructorDeclaration).getParameter(i) and name = "getParameter"
or
result = node.(ConstructorPattern).getConstructor() and i = -1 and name = "getConstructor"
or
result = node.(ConstructorPattern).getElement(i) and name = "getElement"
or
result = node.(ConstructorPattern).getModifier(i) and name = "getModifier"
or
result = node.(ContinueExpr).getLabel() and i = -1 and name = "getLabel"
or
result = node.(DestructorDeclaration).getBody() and i = -1 and name = "getBody"
or
result = node.(DestructorDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(DoWhileStmt).getBody() and i = -1 and name = "getBody"
or
result = node.(DoWhileStmt).getCondition() and i = -1 and name = "getCondition"
or
result = node.(DoWhileStmt).getModifier(i) and name = "getModifier"
or
result = node.(EqualityTypeConstraint).getLeft() and i = -1 and name = "getLeft"
or
result = node.(EqualityTypeConstraint).getRight() and i = -1 and name = "getRight"
or
result = node.(ExprEqualityPattern).getExpr() and i = -1 and name = "getExpr"
or
result = node.(ForEachStmt).getBody() and i = -1 and name = "getBody"
or
result = node.(ForEachStmt).getGuard() and i = -1 and name = "getGuard"
or
result = node.(ForEachStmt).getIterable() and i = -1 and name = "getIterable"
or
result = node.(ForEachStmt).getModifier(i) and name = "getModifier"
or
result = node.(ForEachStmt).getPattern() and i = -1 and name = "getPattern"
or
result = node.(FunctionDeclaration).getBody() and i = -1 and name = "getBody"
or
result = node.(FunctionDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(FunctionDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(FunctionDeclaration).getParameter(i) and name = "getParameter"
or
result = node.(FunctionDeclaration).getReturnType() and i = -1 and name = "getReturnType"
or
result = node.(FunctionDeclaration).getTypeConstraint(i) and name = "getTypeConstraint"
or
result = node.(FunctionDeclaration).getTypeParameter(i) and name = "getTypeParameter"
or
result = node.(FunctionExpr).getBody() and i = -1 and name = "getBody"
or
result = node.(FunctionExpr).getCaptureDeclaration(i) and name = "getCaptureDeclaration"
or
result = node.(FunctionExpr).getModifier(i) and name = "getModifier"
or
result = node.(FunctionExpr).getParameter(i) and name = "getParameter"
or
result = node.(FunctionExpr).getReturnType() and i = -1 and name = "getReturnType"
or
result = node.(FunctionTypeExpr).getParameter(i) and name = "getParameter"
or
result = node.(FunctionTypeExpr).getReturnType() and i = -1 and name = "getReturnType"
or
result = node.(GenericTypeExpr).getBase() and i = -1 and name = "getBase"
or
result = node.(GenericTypeExpr).getTypeArgument(i) and name = "getTypeArgument"
or
result = node.(GuardIfStmt).getCondition() and i = -1 and name = "getCondition"
or
result = node.(GuardIfStmt).getElse() and i = -1 and name = "getElse"
or
result = node.(IfExpr).getCondition() and i = -1 and name = "getCondition"
or
result = node.(IfExpr).getElse() and i = -1 and name = "getElse"
or
result = node.(IfExpr).getThen() and i = -1 and name = "getThen"
or
result = node.(ImportDeclaration).getImportedExpr() and i = -1 and name = "getImportedExpr"
or
result = node.(ImportDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(ImportDeclaration).getPattern() and i = -1 and name = "getPattern"
or
result = node.(InitializerDeclaration).getBody() and i = -1 and name = "getBody"
or
result = node.(InitializerDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(KeyValuePair).getKey() and i = -1 and name = "getKey"
or
result = node.(KeyValuePair).getValue() and i = -1 and name = "getValue"
or
result = node.(LabeledStmt).getLabel() and i = -1 and name = "getLabel"
or
result = node.(LabeledStmt).getStmt() and i = -1 and name = "getStmt"
or
result = node.(MapLiteral).getElement(i) and name = "getElement"
or
result = node.(MemberAccessExpr).getBase() and i = -1 and name = "getBase"
or
result = node.(MemberAccessExpr).getMember() and i = -1 and name = "getMember"
or
result = node.(NameExpr).getIdentifier() and i = -1 and name = "getIdentifier"
or
result = node.(NamePattern).getIdentifier() and i = -1 and name = "getIdentifier"
or
result = node.(NamePattern).getModifier(i) and name = "getModifier"
or
result = node.(NamedTypeExpr).getName() and i = -1 and name = "getName"
or
result = node.(NamedTypeExpr).getQualifier() and i = -1 and name = "getQualifier"
or
result = node.(OperatorSyntaxDeclaration).getFixity() and i = -1 and name = "getFixity"
or
result = node.(OperatorSyntaxDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(OperatorSyntaxDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(OperatorSyntaxDeclaration).getPrecedence() and
i = -1 and
name = "getPrecedence"
or
result = node.(Parameter).getDefault() and i = -1 and name = "getDefault"
or
result = node.(Parameter).getExternalName() and i = -1 and name = "getExternalName"
or
result = node.(Parameter).getModifier(i) and name = "getModifier"
or
result = node.(Parameter).getPattern() and i = -1 and name = "getPattern"
or
result = node.(Parameter).getType() and i = -1 and name = "getType"
or
result = node.(PatternElement).getKey() and i = -1 and name = "getKey"
or
result = node.(PatternElement).getModifier(i) and name = "getModifier"
or
result = node.(PatternElement).getPattern() and i = -1 and name = "getPattern"
or
result = node.(PatternGuardExpr).getPattern() and i = -1 and name = "getPattern"
or
result = node.(PatternGuardExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(ReturnExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(SwitchCase).getBody() and i = -1 and name = "getBody"
or
result = node.(SwitchCase).getGuard() and i = -1 and name = "getGuard"
or
result = node.(SwitchCase).getModifier(i) and name = "getModifier"
or
result = node.(SwitchCase).getPattern(i) and name = "getPattern"
or
result = node.(SwitchExpr).getCase(i) and name = "getCase"
or
result = node.(SwitchExpr).getModifier(i) and name = "getModifier"
or
result = node.(SwitchExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(ThrowExpr).getValue() and i = -1 and name = "getValue"
or
result = node.(TopLevel).getBody() and i = -1 and name = "getBody"
or
result = node.(TryExpr).getBody() and i = -1 and name = "getBody"
or
result = node.(TryExpr).getCatchClause(i) and name = "getCatchClause"
or
result = node.(TryExpr).getModifier(i) and name = "getModifier"
or
result = node.(TupleExpr).getElement(i) and name = "getElement"
or
result = node.(TuplePattern).getElement(i) and name = "getElement"
or
result = node.(TuplePattern).getModifier(i) and name = "getModifier"
or
result = node.(TupleTypeElement).getName() and i = -1 and name = "getName"
or
result = node.(TupleTypeElement).getType() and i = -1 and name = "getType"
or
result = node.(TupleTypeExpr).getElement(i) and name = "getElement"
or
result = node.(TypeAliasDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(TypeAliasDeclaration).getName() and i = -1 and name = "getName"
or
result = node.(TypeAliasDeclaration).getType() and i = -1 and name = "getType"
or
result = node.(TypeAliasDeclaration).getTypeConstraint(i) and name = "getTypeConstraint"
or
result = node.(TypeAliasDeclaration).getTypeParameter(i) and name = "getTypeParameter"
or
result = node.(TypeCastExpr).getExpr() and i = -1 and name = "getExpr"
or
result = node.(TypeCastExpr).getOperator() and i = -1 and name = "getOperator"
or
result = node.(TypeCastExpr).getType() and i = -1 and name = "getType"
or
result = node.(TypeParameter).getBound() and i = -1 and name = "getBound"
or
result = node.(TypeParameter).getModifier(i) and name = "getModifier"
or
result = node.(TypeParameter).getName() and i = -1 and name = "getName"
or
result = node.(TypeTestExpr).getExpr() and i = -1 and name = "getExpr"
or
result = node.(TypeTestExpr).getOperator() and i = -1 and name = "getOperator"
or
result = node.(TypeTestExpr).getType() and i = -1 and name = "getType"
or
result = node.(TypeTestPattern).getPattern() and i = -1 and name = "getPattern"
or
result = node.(TypeTestPattern).getType() and i = -1 and name = "getType"
or
result = node.(UnaryExpr).getOperand() and i = -1 and name = "getOperand"
or
result = node.(UnaryExpr).getOperator() and i = -1 and name = "getOperator"
or
result = node.(VariableDeclaration).getModifier(i) and name = "getModifier"
or
result = node.(VariableDeclaration).getPattern() and i = -1 and name = "getPattern"
or
result = node.(VariableDeclaration).getType() and i = -1 and name = "getType"
or
result = node.(VariableDeclaration).getValue() and i = -1 and name = "getValue"
or
result = node.(WhileStmt).getBody() and i = -1 and name = "getBody"
or
result = node.(WhileStmt).getCondition() and i = -1 and name = "getCondition"
or
result = node.(WhileStmt).getModifier(i) and name = "getModifier"
}
}
}

View File

@@ -1,96 +0,0 @@
/** Provides a configurable query for printing AST nodes */
private import unified
/**
* The query can extend this class to control which nodes are printed.
*/
class PrintAstConfiguration extends string {
PrintAstConfiguration() { this = "PrintAstConfiguration" }
/**
* Holds if the given node should be printed.
*/
predicate shouldPrintNode(AstNode n) { not n instanceof TriviaToken }
/**
* Holds if the given edge should be printed.
*/
predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
exists(string name, int i |
child = PrintAst::getChild(parent, name, i) and
(if i = -1 then edgeName = name else edgeName = name + "(" + i + ")")
)
}
}
private predicate shouldPrintNode(AstNode n) {
any(PrintAstConfiguration config).shouldPrintNode(n)
}
private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
any(PrintAstConfiguration config).shouldPrintAstEdge(parent, edgeName, child) and
shouldPrintNode(parent) and
shouldPrintNode(child)
}
/**
* Get an alias for the predicate `name` to use for ordering purposes, to control where
* in the list of children it should appear.
*/
private string reorderName1(string name) { name = "getModifier" and result = "00_getModifier" }
bindingset[name]
private string reorderName(string name) {
result = reorderName1(name)
or
not exists(reorderName1(name)) and
result = name
}
class PrintAstNode extends AstNode {
final int getOrder() {
this =
rank[result](AstNode parent, AstNode child, string name, int i |
child = PrintAst::getChild(parent, name, i)
|
child order by reorderName(name), i
)
}
final string getProperty(string key) {
key = "semmle.label" and
result = this.toString()
or
key = "semmle.order" and result = this.getOrder().toString()
}
}
/**
* Holds if `node` belongs to the output tree, and its property `key` has the
* given `value`.
*/
query predicate nodes(PrintAstNode node, string key, string value) {
shouldPrintNode(node) and
value = node.getProperty(key)
}
/**
* Holds if `target` is a child of `source` in the AST, and property `key` of
* the edge has the given `value`.
*/
query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) {
key = "semmle.label" and
shouldPrintAstEdge(source, value, target)
or
key = "semmle.order" and
shouldPrintAstEdge(source, _, target) and
value = target.getProperty("semmle.order")
}
/**
* Holds if property `key` of the graph has the given `value`.
*/
query predicate graphProperties(string key, string value) {
key = "semmle.graphKind" and value = "tree"
}

View File

@@ -1,27 +0,0 @@
/**
* @name Print AST
* @description Produces a representation of a file's Abstract Syntax Tree.
* This query is used by the VS Code extension.
* @id unified/print-ast
* @kind graph
* @tags ide-contextual-queries/print-ast
*/
private import codeql.IDEContextual
private import unified
private import codeql.unified.printAst
/**
* The source file to generate an AST from.
*/
external string selectedSourceFile();
/**
* A configuration that only prints nodes in the selected source file.
*/
class Cfg extends PrintAstConfiguration {
override predicate shouldPrintNode(AstNode n) {
super.shouldPrintNode(n) and
n.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile())
}
}