mirror of
https://github.com/github/codeql.git
synced 2026-05-26 09:01:22 +02:00
Compare commits
9 Commits
tausbn/pyt
...
asgerf/js-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47d299e93b | ||
|
|
fbaf648e4f | ||
|
|
637ce99e44 | ||
|
|
bd9d6b1962 | ||
|
|
bd5e4761bd | ||
|
|
93deb33a2a | ||
|
|
f3b27a56b1 | ||
|
|
37852aa1d3 | ||
|
|
78b1651596 |
@@ -26,23 +26,10 @@ string permissionsForJob(Job job) {
|
||||
"{" + concat(string permission | permission = jobNeedsPermission(job) | permission, ", ") + "}"
|
||||
}
|
||||
|
||||
predicate jobHasPermissions(Job job) {
|
||||
exists(job.getPermissions())
|
||||
or
|
||||
exists(job.getEnclosingWorkflow().getPermissions())
|
||||
or
|
||||
// The workflow is reusable and cannot be triggered in any other way; check callers
|
||||
exists(ReusableWorkflow r | r = job.getEnclosingWorkflow() |
|
||||
not exists(Event e | e = r.getOn().getAnEvent() | e.getName() != "workflow_call") and
|
||||
forall(Job caller | caller = job.getEnclosingWorkflow().(ReusableWorkflow).getACaller() |
|
||||
jobHasPermissions(caller)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
from Job job, string permissions
|
||||
where
|
||||
not jobHasPermissions(job) and
|
||||
not exists(job.getPermissions()) and
|
||||
not exists(job.getEnclosingWorkflow().getPermissions()) and
|
||||
// exists a trigger event that is not a workflow_call
|
||||
exists(Event e |
|
||||
e = job.getATriggerEvent() and
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The query `actions/missing-workflow-permissions` no longer produces false positive results on reusable workflows where all callers set permissions.
|
||||
@@ -1,9 +0,0 @@
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build and test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/deploy-pages
|
||||
@@ -1,11 +0,0 @@
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
pages: write
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
uses: ./.github/workflows/perms11.yml
|
||||
@@ -7,12 +7,10 @@ ql/cpp/ql/src/Diagnostics/ExtractedFiles.ql
|
||||
ql/cpp/ql/src/Diagnostics/ExtractionWarnings.ql
|
||||
ql/cpp/ql/src/Diagnostics/FailedExtractorInvocations.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Arithmetic/BadAdditionOverflowCheck.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Arithmetic/IntMultToLong.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Arithmetic/SignedOverflowCheck.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Conversion/CastArrayPointerArithmetic.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Format/SnprintfOverflow.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Format/WrongNumberOfFormatArguments.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Format/WrongTypeFormatArguments.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Memory Management/PointerOverflow.ql
|
||||
ql/cpp/ql/src/Likely Bugs/Memory Management/ReturnStackAllocatedMemory.ql
|
||||
@@ -30,7 +28,6 @@ ql/cpp/ql/src/Security/CWE/CWE-120/VeryLikelyOverrunWrite.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-131/NoSpaceForZeroTerminator.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-190/ComparisonWithWiderType.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-191/UnsignedDifferenceExpressionComparedZero.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-253/HResultBooleanConversion.ql
|
||||
ql/cpp/ql/src/Security/CWE/CWE-311/CleartextFileWrite.ql
|
||||
|
||||
@@ -459,13 +459,6 @@ class FormatLiteral extends Literal instanceof StringLiteral {
|
||||
*/
|
||||
int getConvSpecOffset(int n) { result = this.getFormat().indexOf("%", n, 0) }
|
||||
|
||||
/**
|
||||
* Gets the nth conversion specifier string.
|
||||
*/
|
||||
private string getConvSpecString(int n) {
|
||||
n >= 0 and result = "%" + this.getFormat().splitAt("%", n + 1)
|
||||
}
|
||||
|
||||
/*
|
||||
* Each of these predicates gets a regular expressions to match each individual
|
||||
* parts of a conversion specifier.
|
||||
@@ -531,20 +524,22 @@ class FormatLiteral extends Literal instanceof StringLiteral {
|
||||
int n, string spec, string params, string flags, string width, string prec, string len,
|
||||
string conv
|
||||
) {
|
||||
exists(string convSpec, string regexp |
|
||||
convSpec = this.getConvSpecString(n) and
|
||||
exists(int offset, string fmt, string rst, string regexp |
|
||||
offset = this.getConvSpecOffset(n) and
|
||||
fmt = this.getFormat() and
|
||||
rst = fmt.substring(offset, fmt.length()) and
|
||||
regexp = this.getConvSpecRegexp() and
|
||||
(
|
||||
spec = convSpec.regexpCapture(regexp, 1) and
|
||||
params = convSpec.regexpCapture(regexp, 2) and
|
||||
flags = convSpec.regexpCapture(regexp, 3) and
|
||||
width = convSpec.regexpCapture(regexp, 4) and
|
||||
prec = convSpec.regexpCapture(regexp, 5) and
|
||||
len = convSpec.regexpCapture(regexp, 6) and
|
||||
conv = convSpec.regexpCapture(regexp, 7)
|
||||
spec = rst.regexpCapture(regexp, 1) and
|
||||
params = rst.regexpCapture(regexp, 2) and
|
||||
flags = rst.regexpCapture(regexp, 3) and
|
||||
width = rst.regexpCapture(regexp, 4) and
|
||||
prec = rst.regexpCapture(regexp, 5) and
|
||||
len = rst.regexpCapture(regexp, 6) and
|
||||
conv = rst.regexpCapture(regexp, 7)
|
||||
or
|
||||
spec = convSpec.regexpCapture(regexp, 1) and
|
||||
not exists(convSpec.regexpCapture(regexp, 2)) and
|
||||
spec = rst.regexpCapture(regexp, 1) and
|
||||
not exists(rst.regexpCapture(regexp, 2)) and
|
||||
params = "" and
|
||||
flags = "" and
|
||||
width = "" and
|
||||
@@ -559,10 +554,12 @@ class FormatLiteral extends Literal instanceof StringLiteral {
|
||||
* Gets the nth conversion specifier (including the initial `%`).
|
||||
*/
|
||||
string getConvSpec(int n) {
|
||||
exists(string convSpec, string regexp |
|
||||
convSpec = this.getConvSpecString(n) and
|
||||
exists(int offset, string fmt, string rst, string regexp |
|
||||
offset = this.getConvSpecOffset(n) and
|
||||
fmt = this.getFormat() and
|
||||
rst = fmt.substring(offset, fmt.length()) and
|
||||
regexp = this.getConvSpecRegexp() and
|
||||
result = convSpec.regexpCapture(regexp, 1)
|
||||
result = rst.regexpCapture(regexp, 1)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -194,13 +194,6 @@ class ScanfFormatLiteral extends Expr {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the nth conversion specifier string.
|
||||
*/
|
||||
private string getConvSpecString(int n) {
|
||||
n >= 0 and result = "%" + this.getFormat().splitAt("%", n + 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the regular expression to match each individual part of a conversion specifier.
|
||||
*/
|
||||
@@ -234,14 +227,16 @@ class ScanfFormatLiteral extends Expr {
|
||||
* specifier.
|
||||
*/
|
||||
predicate parseConvSpec(int n, string spec, string width, string len, string conv) {
|
||||
exists(string convSpec, string regexp |
|
||||
convSpec = this.getConvSpecString(n) and
|
||||
exists(int offset, string fmt, string rst, string regexp |
|
||||
offset = this.getConvSpecOffset(n) and
|
||||
fmt = this.getFormat() and
|
||||
rst = fmt.substring(offset, fmt.length()) and
|
||||
regexp = this.getConvSpecRegexp() and
|
||||
(
|
||||
spec = convSpec.regexpCapture(regexp, 1) and
|
||||
width = convSpec.regexpCapture(regexp, 2) and
|
||||
len = convSpec.regexpCapture(regexp, 3) and
|
||||
conv = convSpec.regexpCapture(regexp, 4)
|
||||
spec = rst.regexpCapture(regexp, 1) and
|
||||
width = rst.regexpCapture(regexp, 2) and
|
||||
len = rst.regexpCapture(regexp, 3) and
|
||||
conv = rst.regexpCapture(regexp, 4)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -6,15 +6,11 @@
|
||||
*
|
||||
* The extensible relations have the following columns:
|
||||
* - Sources:
|
||||
* `namespace; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* `namespace; type; subtypes; name; signature; ext; output; kind`
|
||||
* - Sinks:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; kind; provenance`
|
||||
* `namespace; type; subtypes; name; signature; ext; input; kind`
|
||||
* - Summaries:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; output; kind; provenance`
|
||||
* - Barriers:
|
||||
* `namespace; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - BarrierGuards:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; acceptingValue; kind; provenance`
|
||||
* `namespace; type; subtypes; name; signature; ext; input; output; kind`
|
||||
*
|
||||
* The interpretation of a row is similar to API-graphs with a left-to-right
|
||||
* reading.
|
||||
@@ -91,23 +87,11 @@
|
||||
* value, and
|
||||
* - flow from the _second_ indirection of the 0th argument to the first
|
||||
* indirection of the return value, etc.
|
||||
* 8. The `acceptingValue` column of barrier guard models specifies the condition
|
||||
* under which the guard blocks flow. It can be one of "true" or "false". In
|
||||
* the future "no-exception", "not-zero", "null", "not-null" may be supported.
|
||||
* 9. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 8. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources "remote" indicates a default remote flow source, and for summaries
|
||||
* "taint" indicates a default additional taint step and "value" indicates a
|
||||
* globally applicable value-preserving step.
|
||||
* 10. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* The format is {origin}-{verification} or just "manual" where the origin describes
|
||||
* the origin of the model and verification describes how the model has been verified.
|
||||
* Some examples are:
|
||||
* - "df-generated": The model has been generated by the model generator tool.
|
||||
* - "df-manual": The model has been generated by the model generator and verified by a human.
|
||||
* - "manual": The model has been written by hand.
|
||||
* This information is used in a heuristic for dataflow analysis to determine, if a
|
||||
* model or source code should be used for determining flow.
|
||||
*/
|
||||
|
||||
import cpp
|
||||
@@ -947,13 +931,13 @@ private module Cached {
|
||||
|
||||
private predicate barrierGuardChecks(IRGuardCondition g, Expr e, boolean gv, TKindModelPair kmp) {
|
||||
exists(
|
||||
SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingValue,
|
||||
SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingvalue,
|
||||
string kind, string model
|
||||
|
|
||||
isBarrierGuardNode(n, acceptingValue, kind, model) and
|
||||
isBarrierGuardNode(n, acceptingvalue, kind, model) and
|
||||
n.asNode().asExpr() = e and
|
||||
kmp = TMkPair(kind, model) and
|
||||
gv = convertAcceptingValue(acceptingValue).asBooleanValue() and
|
||||
gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and
|
||||
n.asNode().(Private::ArgumentNode).getCall().asCallInstruction() = g
|
||||
)
|
||||
}
|
||||
@@ -970,14 +954,14 @@ private module Cached {
|
||||
) {
|
||||
exists(
|
||||
SourceSinkInterpretationInput::InterpretNode interpretNode,
|
||||
Public::AcceptingValue acceptingValue, string kind, string model, int indirectionIndex,
|
||||
Public::AcceptingValue acceptingvalue, string kind, string model, int indirectionIndex,
|
||||
Private::ArgumentNode arg
|
||||
|
|
||||
isBarrierGuardNode(interpretNode, acceptingValue, kind, model) and
|
||||
isBarrierGuardNode(interpretNode, acceptingvalue, kind, model) and
|
||||
arg = interpretNode.asNode() and
|
||||
arg.asIndirectExpr(indirectionIndex) = e and
|
||||
kmp = MkKindModelPairIntPair(TMkPair(kind, model), indirectionIndex) and
|
||||
gv = convertAcceptingValue(acceptingValue).asBooleanValue() and
|
||||
gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and
|
||||
arg.getCall().asCallInstruction() = g
|
||||
)
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ extensible predicate barrierModel(
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -162,13 +162,13 @@ module SourceSinkInterpretationInput implements
|
||||
}
|
||||
|
||||
predicate barrierGuardElement(
|
||||
Element e, string input, Public::AcceptingValue acceptingValue, string kind,
|
||||
Element e, string input, Public::AcceptingValue acceptingvalue, string kind,
|
||||
Public::Provenance provenance, string model
|
||||
) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext
|
||||
|
|
||||
barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingValue, kind,
|
||||
barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingvalue, kind,
|
||||
provenance, model) and
|
||||
e = interpretElement(package, type, subtypes, name, signature, ext)
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 8.1
|
||||
* @precision high
|
||||
* @precision medium
|
||||
* @id cpp/integer-multiplication-cast-to-long
|
||||
* @tags reliability
|
||||
* security
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @precision medium
|
||||
* @id cpp/wrong-type-format-argument
|
||||
* @tags reliability
|
||||
* correctness
|
||||
|
||||
@@ -14,9 +14,6 @@ function may behave unpredictably.</p>
|
||||
<p>This may indicate a misspelled function name, or that the required header containing
|
||||
the function declaration has not been included.</p>
|
||||
|
||||
<p>Note: This query is not compatible with <code>build mode: none</code> databases, and produces
|
||||
no results on those databases.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>Provide an explicit declaration of the function before invoking it.</p>
|
||||
@@ -29,4 +26,4 @@ no results on those databases.</p>
|
||||
<references>
|
||||
<li>SEI CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/DCL31-C.+Declare+identifiers+before+using+them">DCL31-C. Declare identifiers before using them</a></li>
|
||||
</references>
|
||||
</qhelp>
|
||||
</qhelp>
|
||||
@@ -5,7 +5,7 @@
|
||||
* may lead to unpredictable behavior.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @precision medium
|
||||
* @id cpp/implicit-function-declaration
|
||||
* @tags correctness
|
||||
* maintainability
|
||||
@@ -17,11 +17,6 @@ import TooFewArguments
|
||||
import TooManyArguments
|
||||
import semmle.code.cpp.commons.Exclusions
|
||||
|
||||
/*
|
||||
* This query is not compatible with build mode: none databases, and produces
|
||||
* no results on those databases.
|
||||
*/
|
||||
|
||||
predicate locInfo(Locatable e, File file, int line, int col) {
|
||||
e.getFile() = file and
|
||||
e.getLocation().getStartLine() = line and
|
||||
@@ -44,7 +39,6 @@ predicate isCompiledAsC(File f) {
|
||||
from FunctionDeclarationEntry fdeIm, FunctionCall fc
|
||||
where
|
||||
isCompiledAsC(fdeIm.getFile()) and
|
||||
not any(Compilation c).buildModeNone() and
|
||||
not isFromMacroDefinition(fc) and
|
||||
fdeIm.isImplicit() and
|
||||
sameLocation(fdeIm, fc) and
|
||||
|
||||
@@ -79,7 +79,9 @@ private predicate hasZeroParamDecl(Function f) {
|
||||
|
||||
// True if this file (or header) was compiled as a C file
|
||||
private predicate isCompiledAsC(File f) {
|
||||
exists(File src | src.compiledAsC() | src.getAnIncludedFile*() = f)
|
||||
f.compiledAsC()
|
||||
or
|
||||
exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
|
||||
}
|
||||
|
||||
predicate mistypedFunctionArguments(FunctionCall fc, Function f, Parameter p) {
|
||||
|
||||
@@ -28,7 +28,9 @@ private predicate hasZeroParamDecl(Function f) {
|
||||
|
||||
/* Holds if this file (or header) was compiled as a C file. */
|
||||
private predicate isCompiledAsC(File f) {
|
||||
exists(File src | src.compiledAsC() | src.getAnIncludedFile*() = f)
|
||||
f.compiledAsC()
|
||||
or
|
||||
exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
|
||||
}
|
||||
|
||||
/** Holds if `fc` is a call to `f` with too few arguments. */
|
||||
|
||||
@@ -19,7 +19,9 @@ private predicate hasZeroParamDecl(Function f) {
|
||||
|
||||
// True if this file (or header) was compiled as a C file
|
||||
private predicate isCompiledAsC(File f) {
|
||||
exists(File src | src.compiledAsC() | src.getAnIncludedFile*() = f)
|
||||
f.compiledAsC()
|
||||
or
|
||||
exists(File src | isCompiledAsC(src) | src.getAnIncludedFile() = f)
|
||||
}
|
||||
|
||||
predicate tooManyArguments(FunctionCall fc, Function f) {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @precision medium
|
||||
* @tags reliability
|
||||
* security
|
||||
* external/cwe/cwe-190
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The "Implicit function declaration" (`cpp/implicit-function-declaration`) query no longer produces results on `build mode: none` databases. These results were found to be very noisy and fundamentally imprecise in this mode.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The "Comparison of narrow type with wide type in loop condition" (`cpp/comparison-with-wider-type`) query has been upgraded to `high` precision. This query will now run in the default code scanning suite.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The "Implicit function declaration" (`cpp/implicit-function-declaration`) query has been upgraded to `high` precision.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The "Multiplication result converted to larger type" (`cpp/integer-multiplication-cast-to-long`) query has been upgraded to `high` precision. This query will now run in the default code scanning suite.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The "Wrong type of arguments to formatting function" (`cpp/wrong-type-format-argument`) query has been upgraded to `high` precision. This query will now run in the default code scanning suite.
|
||||
@@ -232,9 +232,14 @@ private module Identity {
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate convTypeArguments(Type fromTypeArgument, Type toTypeArgument, int i) {
|
||||
fromTypeArgument = getTypeArgumentRanked(_, _, pragma[only_bind_into](i)) and
|
||||
toTypeArgument = getTypeArgumentRanked(_, _, pragma[only_bind_into](i)) and
|
||||
convIdentity(fromTypeArgument, toTypeArgument)
|
||||
exists(int j |
|
||||
fromTypeArgument = getTypeArgumentRanked(_, _, i) and
|
||||
toTypeArgument = getTypeArgumentRanked(_, _, j) and
|
||||
i <= j and
|
||||
j <= i
|
||||
|
|
||||
convIdentity(fromTypeArgument, toTypeArgument)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
@@ -924,16 +929,19 @@ private module Variance {
|
||||
private predicate convTypeArguments(
|
||||
TypeArgument fromTypeArgument, TypeArgument toTypeArgument, int i, TVariance v
|
||||
) {
|
||||
fromTypeArgument = getTypeArgumentRanked(_, _, pragma[only_bind_into](i), _) and
|
||||
toTypeArgument = getTypeArgumentRanked(_, _, pragma[only_bind_into](i), _) and
|
||||
(
|
||||
exists(int j |
|
||||
fromTypeArgument = getTypeArgumentRanked(_, _, i, _) and
|
||||
toTypeArgument = getTypeArgumentRanked(_, _, j, _) and
|
||||
i <= j and
|
||||
j <= i
|
||||
|
|
||||
convIdentity(fromTypeArgument, toTypeArgument) and
|
||||
v = TNone()
|
||||
or
|
||||
convRefTypeTypeArgumentOut(fromTypeArgument, toTypeArgument, i) and
|
||||
convRefTypeTypeArgumentOut(fromTypeArgument, toTypeArgument, j) and
|
||||
v = TOut()
|
||||
or
|
||||
convRefTypeTypeArgumentIn(toTypeArgument, fromTypeArgument, i) and
|
||||
convRefTypeTypeArgumentIn(toTypeArgument, fromTypeArgument, j) and
|
||||
v = TIn()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,17 +4,13 @@
|
||||
* Provides classes and predicates for dealing with MaD flow models specified
|
||||
* in data extensions and CSV format.
|
||||
*
|
||||
* The extensible relations have the following columns:
|
||||
* The CSV specification has the following columns:
|
||||
* - Sources:
|
||||
* `namespace; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - Sinks:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; kind; provenance`
|
||||
* - Summaries:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; output; kind; provenance`
|
||||
* - Barriers:
|
||||
* `namespace; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - BarrierGuards:
|
||||
* `namespace; type; subtypes; name; signature; ext; input; acceptingValue; kind; provenance`
|
||||
* - Neutrals:
|
||||
* `namespace; type; name; signature; kind; provenance`
|
||||
* A neutral is used to indicate that a callable is neutral with respect to flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
@@ -73,17 +69,14 @@
|
||||
* - "Field[f]": Selects the contents of field `f`.
|
||||
* - "Property[p]": Selects the contents of property `p`.
|
||||
*
|
||||
* 8. The `acceptingValue` column of barrier guard models specifies the condition
|
||||
* under which the guard blocks flow. It can be one of "true" or "false". In
|
||||
* the future "no-exception", "not-zero", "null", "not-null" may be supported.
|
||||
* 9. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 8. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources "remote" indicates a default remote flow source, and for summaries
|
||||
* "taint" indicates a default additional taint step and "value" indicates a
|
||||
* globally applicable value-preserving step. For neutrals the kind can be `summary`,
|
||||
* `source` or `sink` to indicate that the neutral is neutral with respect to
|
||||
* flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
* 10. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* 9. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* The format is {origin}-{verification} or just "manual" where the origin describes
|
||||
* the origin of the model and verification describes how the model has been verified.
|
||||
* Some examples are:
|
||||
@@ -237,11 +230,11 @@ module ModelValidation {
|
||||
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
|
||||
)
|
||||
or
|
||||
exists(string acceptingValue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingValue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingValue) and
|
||||
exists(string acceptingvalue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingvalue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingvalue) and
|
||||
result =
|
||||
"Unrecognized accepting value description \"" + acceptingValue +
|
||||
"Unrecognized accepting value description \"" + acceptingvalue +
|
||||
"\" in barrier guard model."
|
||||
)
|
||||
}
|
||||
@@ -489,13 +482,13 @@ private module Cached {
|
||||
|
||||
private predicate barrierGuardChecks(Guard g, Expr e, GuardValue gv, TKindModelPair kmp) {
|
||||
exists(
|
||||
SourceSinkInterpretationInput::InterpretNode n, AcceptingValue acceptingValue, string kind,
|
||||
SourceSinkInterpretationInput::InterpretNode n, AcceptingValue acceptingvalue, string kind,
|
||||
string model
|
||||
|
|
||||
isBarrierGuardNode(n, acceptingValue, kind, model) and
|
||||
isBarrierGuardNode(n, acceptingvalue, kind, model) and
|
||||
n.asNode().asExpr() = e and
|
||||
kmp = TMkPair(kind, model) and
|
||||
gv = convertAcceptingValue(acceptingValue)
|
||||
gv = convertAcceptingValue(acceptingvalue)
|
||||
|
|
||||
g.(Call).getAnArgument() = e or g.(QualifiableExpr).getQualifier() = e
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ extensible predicate barrierModel(
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -253,13 +253,13 @@ module SourceSinkInterpretationInput implements
|
||||
}
|
||||
|
||||
predicate barrierGuardElement(
|
||||
Element e, string input, Public::AcceptingValue acceptingValue, string kind,
|
||||
Element e, string input, Public::AcceptingValue acceptingvalue, string kind,
|
||||
Public::Provenance provenance, string model
|
||||
) {
|
||||
exists(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
||||
|
|
||||
barrierGuardModel(namespace, type, subtypes, name, signature, ext, input, acceptingValue,
|
||||
barrierGuardModel(namespace, type, subtypes, name, signature, ext, input, acceptingvalue,
|
||||
kind, provenance, model) and
|
||||
e = interpretElement(namespace, type, subtypes, name, signature, ext, _)
|
||||
)
|
||||
|
||||
@@ -4,17 +4,13 @@
|
||||
* Provides classes and predicates for dealing with flow models specified
|
||||
* in data extensions and CSV format.
|
||||
*
|
||||
* The extensible relations have the following columns:
|
||||
* The CSV specification has the following columns:
|
||||
* - Sources:
|
||||
* `package; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - Sinks:
|
||||
* `package; type; subtypes; name; signature; ext; input; kind; provenance`
|
||||
* - Summaries:
|
||||
* `package; type; subtypes; name; signature; ext; input; output; kind; provenance`
|
||||
* - Barriers:
|
||||
* `package; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - BarrierGuards:
|
||||
* `package; type; subtypes; name; signature; ext; input; acceptingValue; kind; provenance`
|
||||
* - Neutrals:
|
||||
* `package; type; name; signature; kind; provenance`
|
||||
* A neutral is used to indicate that a callable is neutral with respect to flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
@@ -82,23 +78,11 @@
|
||||
* - "MapValue": Selects a value in a map.
|
||||
* - "Dereference": Selects the value referenced by a pointer.
|
||||
*
|
||||
* 8. The `acceptingValue` column of barrier guard models specifies the condition
|
||||
* under which the guard blocks flow. It can be one of "true" or "false". In
|
||||
* the future "no-exception", "not-zero", "null", "not-null" may be supported.
|
||||
* 9. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 8. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources "remote" indicates a default remote flow source, and for summaries
|
||||
* "taint" indicates a default additional taint step and "value" indicates a
|
||||
* globally applicable value-preserving step.
|
||||
* 10. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* The format is {origin}-{verification} or just "manual" where the origin describes
|
||||
* the origin of the model and verification describes how the model has been verified.
|
||||
* Some examples are:
|
||||
* - "df-generated": The model has been generated by the model generator tool.
|
||||
* - "df-manual": The model has been generated by the model generator and verified by a human.
|
||||
* - "manual": The model has been written by hand.
|
||||
* This information is used in a heuristic for dataflow analysis to determine, if a
|
||||
* model or source code should be used for determining flow.
|
||||
*/
|
||||
overlay[local?]
|
||||
module;
|
||||
@@ -266,11 +250,11 @@ module ModelValidation {
|
||||
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
|
||||
)
|
||||
or
|
||||
exists(string acceptingValue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingValue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingValue) and
|
||||
exists(string acceptingvalue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingvalue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingvalue) and
|
||||
result =
|
||||
"Unrecognized accepting value description \"" + acceptingValue +
|
||||
"Unrecognized accepting value description \"" + acceptingvalue +
|
||||
"\" in barrier guard model."
|
||||
)
|
||||
}
|
||||
@@ -478,13 +462,13 @@ private module Cached {
|
||||
|
||||
private predicate barrierGuardChecks(DataFlow::Node g, Expr e, boolean gv, TKindModelPair kmp) {
|
||||
exists(
|
||||
SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingValue,
|
||||
SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingvalue,
|
||||
string kind, string model
|
||||
|
|
||||
isBarrierGuardNode(n, acceptingValue, kind, model) and
|
||||
isBarrierGuardNode(n, acceptingvalue, kind, model) and
|
||||
n.asNode().asExpr() = e and
|
||||
kmp = TMkPair(kind, model) and
|
||||
gv = convertAcceptingValue(acceptingValue)
|
||||
gv = convertAcceptingValue(acceptingvalue)
|
||||
|
|
||||
g.asExpr().(CallExpr).getAnArgument() = e // TODO: qualifier?
|
||||
)
|
||||
|
||||
@@ -35,7 +35,7 @@ extensible predicate barrierModel(
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -174,13 +174,13 @@ module SourceSinkInterpretationInput implements
|
||||
}
|
||||
|
||||
predicate barrierGuardElement(
|
||||
Element e, string input, Public::AcceptingValue acceptingValue, string kind,
|
||||
Element e, string input, Public::AcceptingValue acceptingvalue, string kind,
|
||||
Public::Provenance provenance, string model
|
||||
) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext
|
||||
|
|
||||
barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingValue, kind,
|
||||
barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingvalue, kind,
|
||||
provenance, model) and
|
||||
e = interpretElement(package, type, subtypes, name, signature, ext)
|
||||
)
|
||||
|
||||
@@ -4,17 +4,13 @@
|
||||
* Provides classes and predicates for dealing with flow models specified
|
||||
* in data extensions and CSV format.
|
||||
*
|
||||
* The extensible relations have the following columns:
|
||||
* The CSV specification has the following columns:
|
||||
* - Sources:
|
||||
* `package; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - Sinks:
|
||||
* `package; type; subtypes; name; signature; ext; input; kind; provenance`
|
||||
* - Summaries:
|
||||
* `package; type; subtypes; name; signature; ext; input; output; kind; provenance`
|
||||
* - Barriers:
|
||||
* `package; type; subtypes; name; signature; ext; output; kind; provenance`
|
||||
* - BarrierGuards:
|
||||
* `package; type; subtypes; name; signature; ext; input; acceptingValue; kind; provenance`
|
||||
* - Neutrals:
|
||||
* `package; type; name; signature; kind; provenance`
|
||||
* A neutral is used to indicate that a callable is neutral with respect to flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
@@ -73,17 +69,14 @@
|
||||
* in the given range. The range is inclusive at both ends.
|
||||
* - "ReturnValue": Selects the return value of a call to the selected element.
|
||||
* - "Element": Selects the collection elements of the selected element.
|
||||
* 8. The `acceptingValue` column of barrier guard models specifies the condition
|
||||
* under which the guard blocks flow. It can be one of "true" or "false". In
|
||||
* the future "no-exception", "not-zero", "null", "not-null" may be supported.
|
||||
* 9. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 8. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources "remote" indicates a default remote flow source, and for summaries
|
||||
* "taint" indicates a default additional taint step and "value" indicates a
|
||||
* globally applicable value-preserving step. For neutrals the kind can be `summary`,
|
||||
* `source` or `sink` to indicate that the neutral is neutral with respect to
|
||||
* flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
* 10. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* 9. The `provenance` column is a tag to indicate the origin and verification of a model.
|
||||
* The format is {origin}-{verification} or just "manual" where the origin describes
|
||||
* the origin of the model and verification describes how the model has been verified.
|
||||
* Some examples are:
|
||||
@@ -365,11 +358,11 @@ module ModelValidation {
|
||||
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
|
||||
)
|
||||
or
|
||||
exists(string acceptingValue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingValue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingValue) and
|
||||
exists(string acceptingvalue |
|
||||
barrierGuardModel(_, _, _, _, _, _, _, acceptingvalue, _, _, _) and
|
||||
invalidAcceptingValue(acceptingvalue) and
|
||||
result =
|
||||
"Unrecognized accepting value description \"" + acceptingValue +
|
||||
"Unrecognized accepting value description \"" + acceptingvalue +
|
||||
"\" in barrier guard model."
|
||||
)
|
||||
}
|
||||
@@ -590,13 +583,13 @@ private module Cached {
|
||||
|
||||
private predicate barrierGuardChecks(Guard g, Expr e, GuardValue gv, TKindModelPair kmp) {
|
||||
exists(
|
||||
SourceSinkInterpretationInput::InterpretNode n, AcceptingValue acceptingValue, string kind,
|
||||
SourceSinkInterpretationInput::InterpretNode n, AcceptingValue acceptingvalue, string kind,
|
||||
string model
|
||||
|
|
||||
isBarrierGuardNode(n, acceptingValue, kind, model) and
|
||||
isBarrierGuardNode(n, acceptingvalue, kind, model) and
|
||||
n.asNode().asExpr() = e and
|
||||
kmp = TMkPair(kind, model) and
|
||||
gv = convertAcceptingValue(acceptingValue)
|
||||
gv = convertAcceptingValue(acceptingvalue)
|
||||
|
|
||||
g.(Call).getAnArgument() = e or g.(MethodCall).getQualifier() = e
|
||||
)
|
||||
|
||||
@@ -35,7 +35,7 @@ extensible predicate barrierModel(
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
string input, string acceptingvalue, string kind, string provenance, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -282,7 +282,7 @@ module SourceSinkInterpretationInput implements
|
||||
}
|
||||
|
||||
predicate barrierGuardElement(
|
||||
Element e, string input, Public::AcceptingValue acceptingValue, string kind,
|
||||
Element e, string input, Public::AcceptingValue acceptingvalue, string kind,
|
||||
Public::Provenance provenance, string model
|
||||
) {
|
||||
exists(
|
||||
@@ -290,7 +290,7 @@ module SourceSinkInterpretationInput implements
|
||||
SourceOrSinkElement baseBarrier, string originalInput
|
||||
|
|
||||
barrierGuardModel(namespace, type, subtypes, name, signature, ext, originalInput,
|
||||
acceptingValue, kind, provenance, model) and
|
||||
acceptingvalue, kind, provenance, model) and
|
||||
baseBarrier = interpretElement(namespace, type, subtypes, name, signature, ext, _) and
|
||||
(
|
||||
e = baseBarrier and input = originalInput
|
||||
|
||||
10
javascript/extractor/lib/typescript-go/.gitignore
vendored
Normal file
10
javascript/extractor/lib/typescript-go/.gitignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Build outputs
|
||||
bin/
|
||||
/typescript-parser-wrapper
|
||||
|
||||
# Validation output (generated during test comparison)
|
||||
validation-output/
|
||||
|
||||
# Go build cache
|
||||
*.test
|
||||
*.out
|
||||
94
javascript/extractor/lib/typescript-go/README.md
Normal file
94
javascript/extractor/lib/typescript-go/README.md
Normal file
@@ -0,0 +1,94 @@
|
||||
# TypeScript Parser Wrapper (Go)
|
||||
|
||||
Drop-in replacement for the Node.js TypeScript parser wrapper
|
||||
(`lib/typescript/src/main.ts`) that uses the TypeScript 7 Go-based
|
||||
compiler (`tsgo`) for parsing.
|
||||
|
||||
## Architecture
|
||||
|
||||
The Go wrapper implements the same stdin/stdout JSON protocol as the
|
||||
Node.js wrapper, making it a transparent replacement from the Java
|
||||
extractor's perspective.
|
||||
|
||||
```
|
||||
Java Extractor ──stdin/stdout JSON──▶ Go Wrapper ──▶ tsgo (TypeScript 7)
|
||||
```
|
||||
|
||||
### Protocol
|
||||
|
||||
Commands are sent as one JSON object per line on stdin:
|
||||
|
||||
| Command | Response Type | Description |
|
||||
|-----------------|---------------|------------------------------------|
|
||||
| `get-metadata` | `metadata` | Returns syntax kind/flag mappings |
|
||||
| `prepare-files` | `ok` | Hints about upcoming parse order |
|
||||
| `parse` | `ast` | Parses a file and returns the AST |
|
||||
| `reset` | `reset-done` | Resets state to fresh |
|
||||
| `quit` | *(exits)* | Shuts down the process |
|
||||
|
||||
### Package Structure
|
||||
|
||||
```
|
||||
cmd/typescript-parser-wrapper/ Entry point (server + single-file modes)
|
||||
internal/
|
||||
protocol/ JSON protocol handler
|
||||
tsparser/ Parser backend interface + tsgo impl
|
||||
astconv/ AST property whitelist + conversion
|
||||
validation/ Comparison tests (Node.js vs Go)
|
||||
scripts/
|
||||
validate-output.sh Shell script for bulk comparison
|
||||
testdata/ Sample TypeScript files for testing
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
go build -o bin/typescript-parser-wrapper ./cmd/typescript-parser-wrapper/
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Unit tests
|
||||
go test ./...
|
||||
|
||||
# Validation against Node.js wrapper
|
||||
go test ./internal/validation/ -v
|
||||
|
||||
# Or via shell script
|
||||
./scripts/validate-output.sh testdata/sample.ts
|
||||
```
|
||||
|
||||
## Status
|
||||
|
||||
This is initial scaffolding. The parser backend communicates with the
|
||||
`tsgo` binary (from `@typescript/native-preview`) via its `--api --async`
|
||||
mode, which uses JSON-RPC 2.0 with LSP-style Content-Length framing.
|
||||
|
||||
**Validated so far:**
|
||||
- ✅ Successfully initialize the tsgo API subprocess
|
||||
- ✅ Open a project via `updateSnapshot` with a tsconfig
|
||||
- ✅ Retrieve binary-encoded source file data via `getSourceFile`
|
||||
- ✅ Protocol handler matches the Node.js wrapper's command set
|
||||
- ✅ Validation framework compares outputs (skips gracefully when Go can't parse yet)
|
||||
|
||||
**Key discovery: tsgo API returns binary-encoded ASTs**, not JSON.
|
||||
The `getSourceFile` response is a custom binary format (base64-encoded
|
||||
when using JSON protocol). This means the AST conversion layer needs
|
||||
to decode this binary format rather than transform JSON. See
|
||||
`microsoft/typescript-go/internal/api/encoder/encoder.go` for the
|
||||
format specification.
|
||||
|
||||
### Next Steps
|
||||
|
||||
1. **Decode binary AST format** — Implement a decoder for the tsgo
|
||||
encoder format (flat node array with string tables and sibling pointers)
|
||||
2. **Convert decoded AST to JSON** — Map decoded nodes to the JSON
|
||||
format expected by the Java extractor (property whitelist, `$pos`/`$end`,
|
||||
`$lineStarts`, `$tokens`, `$declarationKind`, string kind names)
|
||||
3. **Wire up end-to-end** — Connect the decoded AST through the protocol
|
||||
handler so `parse` commands return valid AST JSON
|
||||
4. **Validate against Node.js wrapper** — Run the comparison tests
|
||||
5. **Consider alternative: build from source** — Since all typescript-go
|
||||
packages are `internal/`, building our wrapper as a cmd inside a fork
|
||||
would give direct parser access without binary encoding overhead
|
||||
@@ -0,0 +1,47 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/protocol"
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/tsparser"
|
||||
)
|
||||
|
||||
// Handler implements protocol.Handler by delegating to a tsparser.Parser.
|
||||
type Handler struct {
|
||||
parser tsparser.Parser
|
||||
pendingFiles []string
|
||||
}
|
||||
|
||||
// NewHandler creates a new Handler backed by the given parser.
|
||||
func NewHandler(parser tsparser.Parser) *Handler {
|
||||
return &Handler{parser: parser}
|
||||
}
|
||||
|
||||
func (h *Handler) HandleParse(filename string) (interface{}, error) {
|
||||
result, err := h.parser.Parse(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result.AST, nil
|
||||
}
|
||||
|
||||
func (h *Handler) HandlePrepareFiles(filenames []string) error {
|
||||
h.pendingFiles = filenames
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *Handler) HandleReset() error {
|
||||
h.pendingFiles = nil
|
||||
return h.parser.Reset()
|
||||
}
|
||||
|
||||
func (h *Handler) HandleGetMetadata() (*protocol.MetadataResponse, error) {
|
||||
meta, err := h.parser.GetMetadata()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &protocol.MetadataResponse{
|
||||
Type: "metadata",
|
||||
SyntaxKinds: meta.SyntaxKinds,
|
||||
NodeFlags: meta.NodeFlags,
|
||||
}, nil
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
package main
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
func marshalJSON(v interface{}) ([]byte, error) {
|
||||
return json.Marshal(v)
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
// typescript-parser-wrapper is a drop-in replacement for the Node.js
|
||||
// TypeScript parser wrapper (lib/typescript/src/main.ts).
|
||||
//
|
||||
// It implements the same stdin/stdout JSON protocol, allowing the Java
|
||||
// extractor to use the TypeScript 7 (Go-based) compiler for parsing
|
||||
// TypeScript files.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// # Server mode (reads commands from stdin):
|
||||
// typescript-parser-wrapper
|
||||
//
|
||||
// # Parse a single file:
|
||||
// typescript-parser-wrapper file.ts
|
||||
//
|
||||
// # Print version:
|
||||
// typescript-parser-wrapper --version
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/protocol"
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/tsparser"
|
||||
)
|
||||
|
||||
const version = "0.1.0"
|
||||
|
||||
func main() {
|
||||
if len(os.Args) > 1 {
|
||||
arg := os.Args[1]
|
||||
switch {
|
||||
case arg == "--version":
|
||||
fmt.Println("typescript-parser-wrapper (Go) version " + version + " with TypeScript 7")
|
||||
os.Exit(0)
|
||||
case filepath.Ext(arg) == ".ts" || filepath.Ext(arg) == ".tsx":
|
||||
if err := parseSingleFile(arg); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "Unrecognized file or flag: %s\n", arg)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Server mode
|
||||
if err := runServer(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func parseSingleFile(filename string) error {
|
||||
parser := createParser()
|
||||
defer parser.Close()
|
||||
|
||||
result, err := parser.Parse(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp := &protocol.ASTResponse{
|
||||
Type: "ast",
|
||||
AST: result.AST,
|
||||
}
|
||||
|
||||
data, err := marshalJSON(resp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
os.Stdout.Write(data)
|
||||
os.Stdout.Write([]byte("\n"))
|
||||
return nil
|
||||
}
|
||||
|
||||
func runServer() error {
|
||||
parser := createParser()
|
||||
defer parser.Close()
|
||||
|
||||
handler := NewHandler(parser)
|
||||
server := protocol.NewServer(handler)
|
||||
return server.Run()
|
||||
}
|
||||
|
||||
func createParser() tsparser.Parser {
|
||||
config := tsparser.Config{
|
||||
TsgoBinary: os.Getenv("SEMMLE_TYPESCRIPT_TSGO_BINARY"),
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
return tsparser.NewTsgoParser(config)
|
||||
}
|
||||
3
javascript/extractor/lib/typescript-go/go.mod
Normal file
3
javascript/extractor/lib/typescript-go/go.mod
Normal file
@@ -0,0 +1,3 @@
|
||||
module github.com/github/codeql/javascript/extractor/lib/typescript-go
|
||||
|
||||
go 1.22
|
||||
@@ -0,0 +1,211 @@
|
||||
package astconv
|
||||
|
||||
// childProps maps SyntaxKind string names to ordered lists of child property names.
|
||||
// The order corresponds to the bitmask order in the binary encoder. When a node
|
||||
// uses the Children data type (top 2 bits = 0b00), the low byte is a bitmask
|
||||
// indicating which of these properties are present. Children are consumed in order.
|
||||
//
|
||||
// These names must match the property names expected by the Java extractor.
|
||||
// Derived from microsoft/typescript-go/internal/api/encoder/encoder.go.
|
||||
var childProps = map[string][]string{
|
||||
// Multi-child nodes with property mask
|
||||
"QualifiedName": {"left", "right"},
|
||||
"TypeParameter": {"modifiers", "name", "constraint", "default"},
|
||||
"IfStatement": {"expression", "thenStatement", "elseStatement"},
|
||||
"DoStatement": {"statement", "expression"},
|
||||
"WhileStatement": {"expression", "statement"},
|
||||
"ForStatement": {"initializer", "condition", "incrementor", "statement"},
|
||||
"ForInStatement": {"awaitModifier", "initializer", "expression", "statement"},
|
||||
"ForOfStatement": {"awaitModifier", "initializer", "expression", "statement"},
|
||||
"WithStatement": {"expression", "statement"},
|
||||
"SwitchStatement": {"expression", "caseBlock"},
|
||||
"CaseClause": {"expression", "statements"},
|
||||
"DefaultClause": {"expression", "statements"},
|
||||
"TryStatement": {"tryBlock", "catchClause", "finallyBlock"},
|
||||
"CatchClause": {"variableDeclaration", "block"},
|
||||
"LabeledStatement": {"label", "statement"},
|
||||
"VariableStatement": {"modifiers", "declarationList"},
|
||||
"VariableDeclarationList": {"declarations"},
|
||||
"VariableDeclaration": {"name", "exclamationToken", "type", "initializer"},
|
||||
"Parameter": {"modifiers", "dotDotDotToken", "name", "questionToken", "type", "initializer"},
|
||||
"BindingElement": {"dotDotDotToken", "propertyName", "name", "initializer"},
|
||||
"FunctionDeclaration": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"InterfaceDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
"TypeAliasDeclaration": {"modifiers", "name", "typeParameters", "type"},
|
||||
"EnumMember": {"name", "initializer"},
|
||||
"EnumDeclaration": {"modifiers", "name", "members"},
|
||||
"ModuleDeclaration": {"modifiers", "name", "body"},
|
||||
"ImportEqualsDeclaration": {"modifiers", "name", "moduleReference"},
|
||||
"ImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
|
||||
"JSImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
|
||||
"ImportSpecifier": {"propertyName", "name"},
|
||||
"ImportClause": {"name", "namedBindings"},
|
||||
"ExportAssignment": {"modifiers", "expression"},
|
||||
"JSExportAssignment": {"modifiers", "expression"},
|
||||
"NamespaceExportDeclaration": {"modifiers", "name"},
|
||||
"ExportDeclaration": {"modifiers", "exportClause", "moduleSpecifier", "attributes"},
|
||||
"ExportSpecifier": {"propertyName", "name"},
|
||||
"CallSignature": {"typeParameters", "parameters", "type"},
|
||||
"ConstructSignature": {"typeParameters", "parameters", "type"},
|
||||
"Constructor": {"modifiers", "typeParameters", "parameters", "type", "body"},
|
||||
"GetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"SetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"IndexSignature": {"modifiers", "parameters", "type"},
|
||||
"MethodSignature": {"modifiers", "name", "questionToken", "typeParameters", "parameters", "type"},
|
||||
"MethodDeclaration": {"modifiers", "asteriskToken", "name", "questionToken", "typeParameters", "parameters", "type", "body"},
|
||||
"PropertySignature": {"modifiers", "name", "questionToken", "type", "initializer"},
|
||||
"PropertyDeclaration": {"modifiers", "name", "questionToken", "type", "initializer"},
|
||||
"BinaryExpression": {"left", "operatorToken", "right"},
|
||||
"YieldExpression": {"asteriskToken", "expression"},
|
||||
"ArrowFunction": {"modifiers", "typeParameters", "parameters", "type", "equalsGreaterThanToken", "body"},
|
||||
"FunctionExpression": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"AsExpression": {"expression", "type"},
|
||||
"SatisfiesExpression": {"expression", "type"},
|
||||
"ConditionalExpression": {"condition", "questionToken", "whenTrue", "colonToken", "whenFalse"},
|
||||
"PropertyAccessExpression": {"expression", "questionDotToken", "name"},
|
||||
"ElementAccessExpression": {"expression", "questionDotToken", "argumentExpression"},
|
||||
"CallExpression": {"expression", "questionDotToken", "typeArguments", "arguments"},
|
||||
"NewExpression": {"expression", "typeArguments", "arguments"},
|
||||
"TemplateExpression": {"head", "templateSpans"},
|
||||
"TemplateSpan": {"expression", "literal"},
|
||||
"TaggedTemplateExpression": {"tag", "questionDotToken", "typeArguments", "template"},
|
||||
"PropertyAssignment": {"modifiers", "name", "questionToken", "initializer"},
|
||||
"ShorthandPropertyAssignment": {"modifiers", "name", "questionToken", "equalsToken", "objectAssignmentInitializer"},
|
||||
"TypeAssertionExpression": {"type", "expression"},
|
||||
"ConditionalType": {"checkType", "extendsType", "trueType", "falseType"},
|
||||
"IndexedAccessType": {"objectType", "indexType"},
|
||||
"TypeReference": {"typeName", "typeArguments"},
|
||||
"ExpressionWithTypeArguments": {"expression", "typeArguments"},
|
||||
"TypePredicate": {"assertsModifier", "parameterName", "type"},
|
||||
"ImportType": {"argument", "attributes", "qualifier", "typeArguments"},
|
||||
"ImportAttribute": {"name", "value"},
|
||||
"TypeQuery": {"exprName", "typeArguments"},
|
||||
"MappedType": {"readonlyToken", "typeParameter", "nameType", "questionToken", "type", "members"},
|
||||
"NamedTupleMember": {"dotDotDotToken", "name", "questionToken", "type"},
|
||||
"FunctionType": {"typeParameters", "parameters", "type"},
|
||||
"ConstructorType": {"modifiers", "typeParameters", "parameters", "type"},
|
||||
"TemplateLiteralType": {"head", "templateSpans"},
|
||||
"TemplateLiteralTypeSpan": {"type", "literal"},
|
||||
"JsxElement": {"openingElement", "children", "closingElement"},
|
||||
"JsxNamespacedName": {"name", "namespace"},
|
||||
"JsxOpeningElement": {"tagName", "typeArguments", "attributes"},
|
||||
"JsxSelfClosingElement": {"tagName", "typeArguments", "attributes"},
|
||||
"JsxFragment": {"openingFragment", "children", "closingFragment"},
|
||||
"JsxAttribute": {"name", "initializer"},
|
||||
"JsxExpression": {"dotDotDotToken", "expression"},
|
||||
"JSDoc": {"comment", "tags"},
|
||||
"JSDocTypeTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocTag": {"tagName", "comment"},
|
||||
"JSDocTemplateTag": {"tagName", "constraint", "typeParameters", "comment"},
|
||||
"JSDocReturnTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocPublicTag": {"tagName", "comment"},
|
||||
"JSDocPrivateTag": {"tagName", "comment"},
|
||||
"JSDocProtectedTag": {"tagName", "comment"},
|
||||
"JSDocReadonlyTag": {"tagName", "comment"},
|
||||
"JSDocOverrideTag": {"tagName", "comment"},
|
||||
"JSDocDeprecatedTag": {"tagName", "comment"},
|
||||
"JSDocSeeTag": {"tagName", "nameExpression", "comment"},
|
||||
"JSDocImplementsTag": {"tagName", "className", "comment"},
|
||||
"JSDocAugmentsTag": {"tagName", "className", "comment"},
|
||||
"JSDocSatisfiesTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocThrowsTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocThisTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocImportTag": {"tagName", "importClause", "moduleSpecifier", "attributes", "comment"},
|
||||
"JSDocCallbackTag": {"tagName", "typeExpression", "fullName", "comment"},
|
||||
"JSDocOverloadTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocTypedefTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
"JSDocSignature": {"typeParameters", "parameters", "type"},
|
||||
"ClassStaticBlockDeclaration": {"modifiers", "body"},
|
||||
"ClassDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
"ClassExpression": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
|
||||
// JSDocParameterTag and JSDocPropertyTag have order-dependent children
|
||||
// (handled specially in the converter based on isNameFirst defined bit).
|
||||
// Default order (isNameFirst=false):
|
||||
"JSDocParameterTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
"JSDocPropertyTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
}
|
||||
|
||||
// singleChildProp maps node kinds that have exactly one Node child to
|
||||
// the property name for that child.
|
||||
var singleChildProp = map[string]string{
|
||||
"ReturnStatement": "expression",
|
||||
"ThrowStatement": "expression",
|
||||
"ExpressionStatement": "expression",
|
||||
"BreakStatement": "label",
|
||||
"ContinueStatement": "label",
|
||||
"ParenthesizedExpression": "expression",
|
||||
"ComputedPropertyName": "expression",
|
||||
"Decorator": "expression",
|
||||
"SpreadElement": "expression",
|
||||
"SpreadAssignment": "expression",
|
||||
"DeleteExpression": "expression",
|
||||
"TypeOfExpression": "expression",
|
||||
"VoidExpression": "expression",
|
||||
"AwaitExpression": "expression",
|
||||
"NonNullExpression": "expression",
|
||||
"ExternalModuleReference": "expression",
|
||||
"NamespaceImport": "name",
|
||||
"NamespaceExport": "name",
|
||||
"JsxClosingElement": "tagName",
|
||||
"ArrayType": "elementType",
|
||||
"LiteralType": "literal",
|
||||
"InferType": "typeParameter",
|
||||
"OptionalType": "type",
|
||||
"RestType": "type",
|
||||
"ParenthesizedType": "type",
|
||||
"JSDocTypeExpression": "type",
|
||||
"JSDocNonNullableType": "type",
|
||||
"JSDocNullableType": "type",
|
||||
"JSDocVariadicType": "type",
|
||||
"JSDocOptionalType": "type",
|
||||
"JSDocNameReference": "name",
|
||||
}
|
||||
|
||||
// singleNodeListProp maps node kinds that have exactly one NodeList child
|
||||
// to the property name for that child.
|
||||
var singleNodeListProp = map[string]string{
|
||||
"Block": "statements",
|
||||
"ArrayLiteralExpression": "elements",
|
||||
"ObjectLiteralExpression": "properties",
|
||||
"UnionType": "types",
|
||||
"IntersectionType": "types",
|
||||
"TupleType": "elements",
|
||||
"NamedImports": "elements",
|
||||
"NamedExports": "elements",
|
||||
"ModuleBlock": "statements",
|
||||
"CaseBlock": "clauses",
|
||||
"TypeLiteral": "members",
|
||||
"JsxAttributes": "properties",
|
||||
"ArrayBindingPattern": "elements",
|
||||
"ObjectBindingPattern": "elements",
|
||||
"HeritageClause": "types",
|
||||
"ImportAttributes": "elements",
|
||||
"JSDocTypeLiteral": "jsDocPropertyTags",
|
||||
}
|
||||
|
||||
// operandKinds are node kinds where the single child is called "operand"
|
||||
// and the operator is encoded in the defined bits.
|
||||
var operandKinds = map[string]bool{
|
||||
"PrefixUnaryExpression": true,
|
||||
"PostfixUnaryExpression": true,
|
||||
}
|
||||
|
||||
// GetChildProperties returns the ordered child property names for the given
|
||||
// SyntaxKind name. Returns nil if the kind has no registered child properties
|
||||
// (leaf node, single-child, or NodeList-child).
|
||||
func GetChildProperties(kindName string) []string {
|
||||
return childProps[kindName]
|
||||
}
|
||||
|
||||
// GetSingleChildProperty returns the property name for a single-child node.
|
||||
// Returns "" if the kind is not a single-child node.
|
||||
func GetSingleChildProperty(kindName string) string {
|
||||
return singleChildProp[kindName]
|
||||
}
|
||||
|
||||
// GetSingleNodeListProperty returns the property name for a single-NodeList-child node.
|
||||
// Returns "" if the kind is not a single-NodeList-child node.
|
||||
func GetSingleNodeListProperty(kindName string) string {
|
||||
return singleNodeListProp[kindName]
|
||||
}
|
||||
@@ -0,0 +1,842 @@
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Converter transforms a BinaryAST into the JSON format expected by the
|
||||
// Java extractor.
|
||||
type Converter struct {
|
||||
ast *BinaryAST
|
||||
kindNames map[uint32]string // numeric kind → string name
|
||||
sourceText string // source file text for $lineStarts / $pos augmentation
|
||||
utf16Offsets []int // maps byte offset → UTF-16 code unit offset
|
||||
byteOffsets []int // maps UTF-16 code unit offset → byte offset
|
||||
parseDiagnostics []ParseDiagnostic // syntactic diagnostics from the compiler
|
||||
}
|
||||
|
||||
// ParseDiagnostic represents a syntactic error reported by the TypeScript compiler.
|
||||
type ParseDiagnostic struct {
|
||||
Pos int // UTF-16 offset of error start
|
||||
End int // UTF-16 offset of error end
|
||||
MessageText string // human-readable error message
|
||||
}
|
||||
|
||||
// NewConverter creates a Converter for the given binary AST.
|
||||
// kindToName maps numeric SyntaxKind values to their string names.
|
||||
func NewConverter(ast *BinaryAST, kindToName map[uint32]string) *Converter {
|
||||
text := ast.SourceText()
|
||||
utf16Table, byteTable := buildOffsetTables(text)
|
||||
return &Converter{
|
||||
ast: ast,
|
||||
kindNames: kindToName,
|
||||
sourceText: text,
|
||||
utf16Offsets: utf16Table,
|
||||
byteOffsets: byteTable,
|
||||
}
|
||||
}
|
||||
|
||||
// SetParseDiagnostics sets the syntactic diagnostics to include in the output.
|
||||
func (c *Converter) SetParseDiagnostics(diags []ParseDiagnostic) {
|
||||
c.parseDiagnostics = diags
|
||||
}
|
||||
|
||||
// Convert transforms the binary AST into a JSON-serializable map.
|
||||
// The root node is at index 1.
|
||||
func (c *Converter) Convert() (map[string]interface{}, error) {
|
||||
if c.ast.NodeCount() < 2 {
|
||||
return nil, fmt.Errorf("no nodes to convert")
|
||||
}
|
||||
return c.convertNode(1)
|
||||
}
|
||||
|
||||
// ConvertJSON is a convenience method that converts to JSON bytes.
|
||||
func (c *Converter) ConvertJSON() (json.RawMessage, error) {
|
||||
obj, err := c.Convert()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return json.Marshal(obj)
|
||||
}
|
||||
|
||||
func (c *Converter) convertNode(i int) (map[string]interface{}, error) {
|
||||
kind := c.ast.Kind(i)
|
||||
kindName := c.kindNames[kind]
|
||||
if kindName == "" {
|
||||
kindName = fmt.Sprintf("Unknown_%d", kind)
|
||||
}
|
||||
|
||||
node := map[string]interface{}{
|
||||
"kind": int(kind),
|
||||
"flags": int(c.ast.Flags(i)),
|
||||
"$pos": c.augmentPos(int(c.ast.Pos(i)), true),
|
||||
"$end": int(c.ast.End(i)),
|
||||
}
|
||||
|
||||
dataType := c.ast.DataType(i)
|
||||
|
||||
switch dataType {
|
||||
case nodeDataTypeString:
|
||||
c.handleStringNode(i, kindName, node)
|
||||
|
||||
case nodeDataTypeExtended:
|
||||
if err := c.handleExtendedNode(i, kindName, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
default: // nodeDataTypeChildren
|
||||
if err := c.handleChildrenNode(i, kindName, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Add defined-bits-based properties
|
||||
c.addDefinedBitProperties(i, kindName, node)
|
||||
|
||||
if kindName == "ModuleDeclaration" {
|
||||
// TS7 doesn't set the NestedNamespace flag in the binary AST, but the Java
|
||||
// extractor needs it to wrap inner namespace declarations in ExportNamedDeclaration.
|
||||
// Detect nested namespaces (ModuleDeclaration whose body is another ModuleDeclaration)
|
||||
// and add the flag to the inner declaration.
|
||||
if body, ok := node["body"].(map[string]interface{}); ok {
|
||||
if bodyKind, ok := body["kind"].(int); ok && bodyKind == 268 { // 268 = ModuleDeclaration
|
||||
if flags, ok := body["flags"].(int); ok {
|
||||
body["flags"] = flags | 8 // NestedNamespace = 8
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TS7 binary AST doesn't have a GlobalAugmentation flag. Detect `declare global {}`
|
||||
// by checking if the name is "global" (Identifier), and set a synthetic flag bit
|
||||
// so the Java extractor can distinguish it from regular namespace declarations.
|
||||
if name, ok := node["name"].(map[string]interface{}); ok {
|
||||
if nameKind, ok := name["kind"].(int); ok && nameKind == 79 { // 79 = Identifier
|
||||
if text, _ := name["escapedText"].(string); text == "global" {
|
||||
if flags, ok := node["flags"].(int); ok {
|
||||
node["flags"] = flags | (1 << 30) // synthetic GlobalAugmentation
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// handleStringNode handles nodes with a string property (Identifier, StringLiteral, etc.)
|
||||
func (c *Converter) handleStringNode(i int, kindName string, node map[string]interface{}) {
|
||||
strIdx := c.ast.StringIndex(i)
|
||||
text := c.ast.GetString(strIdx)
|
||||
|
||||
switch kindName {
|
||||
case "Identifier", "PrivateIdentifier":
|
||||
node["escapedText"] = text
|
||||
default:
|
||||
node["text"] = text
|
||||
}
|
||||
}
|
||||
|
||||
// handleExtendedNode handles SourceFile and template literal nodes.
|
||||
func (c *Converter) handleExtendedNode(i int, kindName string, node map[string]interface{}) error {
|
||||
extOff := c.ast.ExtOffset(i)
|
||||
|
||||
switch kindName {
|
||||
case "SourceFile":
|
||||
return c.handleSourceFile(i, extOff, node)
|
||||
case "TemplateHead", "TemplateMiddle", "TemplateTail":
|
||||
c.handleTemplateLiteral(extOff, node)
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unknown extended data node kind: %s", kindName)
|
||||
}
|
||||
}
|
||||
|
||||
// handleSourceFile extracts SourceFile-specific data from extended data.
|
||||
func (c *Converter) handleSourceFile(i int, extOff uint32, node map[string]interface{}) error {
|
||||
// SourceFile extended data layout:
|
||||
// [0-4] textIdx, [4-8] fileNameIdx, [8-12] pathIdx,
|
||||
// [12-16] languageVariant, [16-20] scriptKind,
|
||||
// [20-24] referencedFiles, [24-28] typeReferenceDirectives, [28-32] libReferenceDirectives
|
||||
// [32-36] imports, [36-40] moduleAugmentations, [40-44] ambientModuleNames
|
||||
// [44-48] externalModuleIndicator
|
||||
|
||||
fileNameIdx := c.ast.ExtUint32(extOff + 4)
|
||||
node["fileName"] = c.ast.GetString(fileNameIdx)
|
||||
|
||||
// Add source text
|
||||
if c.sourceText != "" {
|
||||
node["text"] = c.sourceText
|
||||
node["$lineStarts"] = computeLineStarts(c.sourceText, c.utf16Offsets)
|
||||
}
|
||||
|
||||
// Add parseDiagnostics (expected by Java extractor).
|
||||
// The Java extractor uses these to report syntax errors and skip full extraction.
|
||||
diagArray := make([]interface{}, 0, len(c.parseDiagnostics))
|
||||
for _, d := range c.parseDiagnostics {
|
||||
diagArray = append(diagArray, map[string]interface{}{
|
||||
"$pos": d.Pos,
|
||||
"messageText": d.MessageText,
|
||||
})
|
||||
}
|
||||
node["parseDiagnostics"] = diagArray
|
||||
|
||||
// Add children (statements + EndOfFile)
|
||||
children := c.ast.Children(i)
|
||||
statementsFound := false
|
||||
for _, ci := range children {
|
||||
if c.ast.IsNodeList(ci) {
|
||||
arr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["statements"] = arr
|
||||
statementsFound = true
|
||||
}
|
||||
// Skip EndOfFile token — the Java extractor doesn't use it
|
||||
}
|
||||
if !statementsFound {
|
||||
node["statements"] = []interface{}{}
|
||||
}
|
||||
|
||||
// Generate $tokens by scanning the source text.
|
||||
if c.sourceText != "" {
|
||||
events := c.collectRescanEvents(i)
|
||||
scanner := NewScanner(c.sourceText, events)
|
||||
rawTokens := scanner.ScanAll()
|
||||
tokenArr := make([]interface{}, len(rawTokens))
|
||||
for ti, tok := range rawTokens {
|
||||
tokenArr[ti] = map[string]interface{}{
|
||||
"kind": tok.Kind,
|
||||
"tokenPos": byteToUTF16(tok.TokenPos, c.utf16Offsets),
|
||||
"text": tok.Text,
|
||||
}
|
||||
}
|
||||
node["$tokens"] = tokenArr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleTemplateLiteral extracts template literal data from extended data.
|
||||
func (c *Converter) handleTemplateLiteral(extOff uint32, node map[string]interface{}) {
|
||||
textIdx := c.ast.ExtUint32(extOff)
|
||||
rawTextIdx := c.ast.ExtUint32(extOff + 4)
|
||||
node["text"] = c.ast.GetString(textIdx)
|
||||
node["rawText"] = c.ast.GetString(rawTextIdx)
|
||||
}
|
||||
|
||||
// handleChildrenNode handles nodes with child properties determined by a bitmask.
|
||||
func (c *Converter) handleChildrenNode(i int, kindName string, node map[string]interface{}) error {
|
||||
children := c.ast.Children(i)
|
||||
|
||||
// Check for single-child nodes
|
||||
if prop := GetSingleChildProperty(kindName); prop != "" {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node[prop] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for single NodeList child nodes
|
||||
if prop := GetSingleNodeListProperty(kindName); prop != "" {
|
||||
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
|
||||
arr, err := c.convertNodeList(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node[prop] = arr
|
||||
} else if len(children) > 0 {
|
||||
// Some single-NodeList nodes may not have a NodeList child
|
||||
// (e.g., JSDocTypeLiteral). Fall through to multi-child handling.
|
||||
} else {
|
||||
node[prop] = []interface{}{}
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for operator-in-definedBits nodes (PrefixUnaryExpression, PostfixUnaryExpression)
|
||||
if operandKinds[kindName] {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["operand"] = child
|
||||
}
|
||||
node["operator"] = int(c.ast.DefinedBits(i))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Multi-child nodes with property mask
|
||||
props := GetChildProperties(kindName)
|
||||
if props != nil {
|
||||
return c.assignChildProperties(i, kindName, props, children, node)
|
||||
}
|
||||
|
||||
// Token/keyword nodes with no children — nothing to add
|
||||
if len(children) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MetaProperty: keywordToken + name
|
||||
if kindName == "MetaProperty" {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["name"] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TypeOperator: operator keyword kind inferred from source text + type child
|
||||
if kindName == "TypeOperator" {
|
||||
// Operator (keyof/unique/readonly) is not in the binary encoding.
|
||||
bytePos := utf16ToByte(int(c.ast.Pos(i)), c.byteOffsets)
|
||||
if c.sourceText != "" && bytePos < len(c.sourceText) {
|
||||
text := c.sourceText[bytePos:]
|
||||
// Skip leading trivia
|
||||
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
|
||||
text = text[1:]
|
||||
}
|
||||
if len(text) >= 5 && text[:5] == "keyof" {
|
||||
node["operator"] = int(c.kindForName("KeyOfKeyword"))
|
||||
} else if len(text) >= 6 && text[:6] == "unique" {
|
||||
node["operator"] = int(c.kindForName("UniqueKeyword"))
|
||||
} else if len(text) >= 8 && text[:8] == "readonly" {
|
||||
node["operator"] = int(c.kindForName("ReadonlyKeyword"))
|
||||
}
|
||||
}
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["type"] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MissingDeclaration: optional modifiers child
|
||||
if kindName == "MissingDeclaration" {
|
||||
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
|
||||
arr, err := c.convertNodeList(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["modifiers"] = arr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unknown node kind with children — emit them as a generic "children" array
|
||||
arr := make([]interface{}, 0, len(children))
|
||||
for _, ci := range children {
|
||||
if c.ast.IsNodeList(ci) {
|
||||
nlArr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, item := range nlArr {
|
||||
arr = append(arr, item)
|
||||
}
|
||||
} else {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
arr = append(arr, child)
|
||||
}
|
||||
}
|
||||
if len(arr) > 0 {
|
||||
node["children"] = arr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// assignChildProperties distributes children to named properties based on
|
||||
// the bitmask in the node's data field.
|
||||
func (c *Converter) assignChildProperties(nodeIdx int, kindName string, props []string, children []int, node map[string]interface{}) error {
|
||||
mask := c.ast.ChildMask(nodeIdx)
|
||||
definedBits := c.ast.DefinedBits(nodeIdx)
|
||||
|
||||
// Special handling for JSDocParameterTag/JSDocPropertyTag where
|
||||
// child order depends on isNameFirst
|
||||
if (kindName == "JSDocParameterTag" || kindName == "JSDocPropertyTag") && definedBits&2 != 0 {
|
||||
// isNameFirst=true: order is tagName, name, typeExpression, comment
|
||||
props = []string{"tagName", "name", "typeExpression", "comment"}
|
||||
}
|
||||
|
||||
childIdx := 0
|
||||
for bit, prop := range props {
|
||||
if bit < 8 && mask != 0 && mask&(1<<uint(bit)) == 0 {
|
||||
// Property not present per bitmask. For array properties,
|
||||
// emit an empty array (the Java extractor expects them).
|
||||
if isArrayProperty(prop) {
|
||||
node[prop] = []interface{}{}
|
||||
}
|
||||
continue
|
||||
}
|
||||
// If mask is 0 (single-child or no disambiguation needed), consume sequentially
|
||||
if childIdx >= len(children) {
|
||||
// No more children — emit empty arrays for remaining array properties
|
||||
if isArrayProperty(prop) {
|
||||
node[prop] = []interface{}{}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
ci := children[childIdx]
|
||||
childIdx++
|
||||
|
||||
if c.ast.IsNodeList(ci) {
|
||||
arr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Filter out zero-width synthetic modifiers (TS7 adds these for
|
||||
// nested namespace bodies, but TS5/Node.js doesn't emit them).
|
||||
if prop == "modifiers" {
|
||||
filtered := make([]interface{}, 0, len(arr))
|
||||
for _, elem := range arr {
|
||||
if m, ok := elem.(map[string]interface{}); ok {
|
||||
pos, _ := m["$pos"].(int)
|
||||
end, _ := m["$end"].(int)
|
||||
if pos == end {
|
||||
continue // zero-width synthetic node
|
||||
}
|
||||
}
|
||||
filtered = append(filtered, elem)
|
||||
}
|
||||
if len(filtered) == 0 {
|
||||
continue // drop entirely
|
||||
}
|
||||
arr = filtered
|
||||
}
|
||||
node[prop] = arr
|
||||
} else {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Remap TS7 "postfixToken" (questionToken property) to the correct name
|
||||
// based on the actual token kind. TS7 uses a single PostfixToken
|
||||
// for what TS5 had as separate questionToken/exclamationToken.
|
||||
if prop == "questionToken" {
|
||||
childKind := c.ast.Kind(ci)
|
||||
exclamationKind := c.kindForName("ExclamationToken")
|
||||
if exclamationKind != 0 && childKind == exclamationKind {
|
||||
prop = "exclamationToken"
|
||||
}
|
||||
}
|
||||
node[prop] = child
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// isArrayProperty returns true for property names that should be empty arrays
|
||||
// (not omitted) when absent in the binary AST.
|
||||
func isArrayProperty(prop string) bool {
|
||||
return arrayProperties[prop]
|
||||
}
|
||||
|
||||
var arrayProperties = map[string]bool{
|
||||
"arguments": true,
|
||||
"declarations": true,
|
||||
"elements": true,
|
||||
"members": true,
|
||||
"parameters": true,
|
||||
"properties": true,
|
||||
}
|
||||
|
||||
// convertNodeList converts a NodeList into a JSON array.
|
||||
func (c *Converter) convertNodeList(i int) ([]interface{}, error) {
|
||||
children := c.ast.Children(i)
|
||||
arr := make([]interface{}, 0, len(children))
|
||||
for _, ci := range children {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arr = append(arr, child)
|
||||
}
|
||||
return arr, nil
|
||||
}
|
||||
|
||||
// addDefinedBitProperties adds properties derived from the defined bits
|
||||
// (bits 24-29 of the data field) that aren't part of the child tree.
|
||||
func (c *Converter) addDefinedBitProperties(i int, kindName string, node map[string]interface{}) {
|
||||
definedBits := c.ast.DefinedBits(i)
|
||||
|
||||
switch kindName {
|
||||
case "ImportSpecifier", "ImportEqualsDeclaration", "ExportSpecifier", "ExportDeclaration":
|
||||
node["isTypeOnly"] = definedBits&1 != 0
|
||||
case "ImportClause":
|
||||
node["isTypeOnly"] = definedBits&1 != 0
|
||||
if definedBits&2 != 0 {
|
||||
node["phaseModifier"] = "defer"
|
||||
}
|
||||
case "ImportType":
|
||||
if definedBits&1 != 0 {
|
||||
node["isTypeOf"] = true
|
||||
} else {
|
||||
node["isTypeOf"] = false
|
||||
}
|
||||
case "ExportAssignment", "JSExportAssignment":
|
||||
if definedBits&1 != 0 {
|
||||
node["isExportEquals"] = true
|
||||
}
|
||||
case "VariableDeclarationList":
|
||||
// Determine $declarationKind from defined bits
|
||||
if definedBits&2 != 0 {
|
||||
node["$declarationKind"] = "const"
|
||||
} else if definedBits&1 != 0 {
|
||||
node["$declarationKind"] = "let"
|
||||
} else {
|
||||
node["$declarationKind"] = "var"
|
||||
}
|
||||
case "ImportAttributes":
|
||||
if definedBits&2 != 0 {
|
||||
node["token"] = c.kindForName("AssertKeyword")
|
||||
} else {
|
||||
node["token"] = c.kindForName("WithKeyword")
|
||||
}
|
||||
case "HeritageClause":
|
||||
// Token (extends/implements) is not in the binary encoding.
|
||||
// Infer from source text, skipping leading trivia.
|
||||
bytePos := utf16ToByte(int(c.ast.Pos(i)), c.byteOffsets)
|
||||
if c.sourceText != "" && bytePos < len(c.sourceText) {
|
||||
text := c.sourceText[bytePos:]
|
||||
// Skip whitespace/newlines
|
||||
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
|
||||
text = text[1:]
|
||||
}
|
||||
if len(text) >= 10 && text[:10] == "implements" {
|
||||
node["token"] = int(c.kindForName("ImplementsKeyword"))
|
||||
} else {
|
||||
node["token"] = int(c.kindForName("ExtendsKeyword"))
|
||||
}
|
||||
}
|
||||
case "JSDocParameterTag", "JSDocPropertyTag":
|
||||
if definedBits&1 != 0 {
|
||||
node["isBracketed"] = true
|
||||
}
|
||||
if definedBits&2 != 0 {
|
||||
node["isNameFirst"] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// augmentPos replicates the Node.js wrapper's $pos augmentation:
|
||||
// if skip is true, advances past leading whitespace, single-line comments (//),
|
||||
// and multi-line comments (/* */). This matches the TS5 Node.js wrapper regex:
|
||||
// /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g
|
||||
// Note: shebangs (#!) are NOT skipped — the TS5 regex does not match them.
|
||||
// Input pos is a UTF-16 code unit offset; returns a UTF-16 code unit offset.
|
||||
func (c *Converter) augmentPos(pos int, skip bool) int {
|
||||
if !skip || c.sourceText == "" {
|
||||
return pos
|
||||
}
|
||||
return byteToUTF16(c.skipTrivia(utf16ToByte(pos, c.byteOffsets)), c.utf16Offsets)
|
||||
}
|
||||
|
||||
// augmentBytePos converts a UTF-16 offset to byte offset then skips trivia,
|
||||
// returning the result as a byte offset. Used for scanner rescan events.
|
||||
func (c *Converter) augmentBytePos(utf16Pos int) int {
|
||||
return c.skipTrivia(utf16ToByte(utf16Pos, c.byteOffsets))
|
||||
}
|
||||
|
||||
// skipTrivia advances past whitespace, single-line comments (//), and
|
||||
// multi-line comments (/* */), starting at byte offset i.
|
||||
func (c *Converter) skipTrivia(i int) int {
|
||||
n := len(c.sourceText)
|
||||
for i < n {
|
||||
ch := c.sourceText[i]
|
||||
if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\v' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if ch == '/' && i+1 < n {
|
||||
next := c.sourceText[i+1]
|
||||
if next == '/' {
|
||||
// Single-line comment — skip to end of line
|
||||
i += 2
|
||||
for i < n && c.sourceText[i] != '\n' {
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
if next == '*' {
|
||||
// Multi-line comment — skip to */
|
||||
i += 2
|
||||
for i+1 < n {
|
||||
if c.sourceText[i] == '*' && c.sourceText[i+1] == '/' {
|
||||
i += 2
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// computeLineStarts returns an array of UTF-16 code unit offsets where each line starts.
|
||||
func computeLineStarts(text string, utf16Offsets []int) []int {
|
||||
starts := []int{0}
|
||||
for i := 0; i < len(text); i++ {
|
||||
ch := text[i]
|
||||
if ch == '\n' {
|
||||
starts = append(starts, byteToUTF16(i+1, utf16Offsets))
|
||||
} else if ch == '\r' {
|
||||
if i+1 < len(text) && text[i+1] == '\n' {
|
||||
i++
|
||||
}
|
||||
starts = append(starts, byteToUTF16(i+1, utf16Offsets))
|
||||
}
|
||||
}
|
||||
return starts
|
||||
}
|
||||
|
||||
// buildOffsetTables builds bidirectional mapping tables between byte offsets
|
||||
// and UTF-16 code unit offsets.
|
||||
func buildOffsetTables(text string) (byteToUTF16Table []int, utf16ToByteTable []int) {
|
||||
byteToUTF16Table = make([]int, len(text)+1)
|
||||
// First pass: compute total UTF-16 length and byte→UTF-16 mapping
|
||||
utf16Pos := 0
|
||||
i := 0
|
||||
for i < len(text) {
|
||||
byteToUTF16Table[i] = utf16Pos
|
||||
b := text[i]
|
||||
if b < 0x80 {
|
||||
i++
|
||||
utf16Pos++
|
||||
} else if b < 0xE0 {
|
||||
if i+1 < len(byteToUTF16Table) {
|
||||
byteToUTF16Table[i+1] = utf16Pos
|
||||
}
|
||||
i += 2
|
||||
utf16Pos++
|
||||
} else if b < 0xF0 {
|
||||
if i+1 < len(byteToUTF16Table) {
|
||||
byteToUTF16Table[i+1] = utf16Pos
|
||||
}
|
||||
if i+2 < len(byteToUTF16Table) {
|
||||
byteToUTF16Table[i+2] = utf16Pos
|
||||
}
|
||||
i += 3
|
||||
utf16Pos++
|
||||
} else {
|
||||
// 4-byte UTF-8 = 2 UTF-16 code units (surrogate pair)
|
||||
for j := 1; j < 4 && i+j < len(byteToUTF16Table); j++ {
|
||||
byteToUTF16Table[i+j] = utf16Pos
|
||||
}
|
||||
i += 4
|
||||
utf16Pos += 2
|
||||
}
|
||||
}
|
||||
byteToUTF16Table[len(text)] = utf16Pos
|
||||
|
||||
// Second pass: build UTF-16→byte mapping
|
||||
utf16ToByteTable = make([]int, utf16Pos+1)
|
||||
i = 0
|
||||
utf16Pos = 0
|
||||
for i < len(text) {
|
||||
utf16ToByteTable[utf16Pos] = i
|
||||
b := text[i]
|
||||
if b < 0x80 {
|
||||
i++
|
||||
utf16Pos++
|
||||
} else if b < 0xE0 {
|
||||
i += 2
|
||||
utf16Pos++
|
||||
} else if b < 0xF0 {
|
||||
i += 3
|
||||
utf16Pos++
|
||||
} else {
|
||||
utf16ToByteTable[utf16Pos+1] = i
|
||||
i += 4
|
||||
utf16Pos += 2
|
||||
}
|
||||
}
|
||||
utf16ToByteTable[utf16Pos] = i
|
||||
return
|
||||
}
|
||||
|
||||
// byteToUTF16 converts a byte offset to a UTF-16 code unit offset.
|
||||
func byteToUTF16(byteOff int, table []int) int {
|
||||
if len(table) == 0 {
|
||||
return byteOff
|
||||
}
|
||||
if byteOff >= len(table) {
|
||||
return table[len(table)-1]
|
||||
}
|
||||
if byteOff < 0 {
|
||||
return 0
|
||||
}
|
||||
return table[byteOff]
|
||||
}
|
||||
|
||||
// utf16ToByte converts a UTF-16 code unit offset to a byte offset.
|
||||
func utf16ToByte(utf16Off int, table []int) int {
|
||||
if len(table) == 0 {
|
||||
return utf16Off
|
||||
}
|
||||
if utf16Off >= len(table) {
|
||||
return table[len(table)-1]
|
||||
}
|
||||
if utf16Off < 0 {
|
||||
return 0
|
||||
}
|
||||
return table[utf16Off]
|
||||
}
|
||||
|
||||
// kindForName returns the numeric kind for a given string name.
|
||||
// This is the reverse of kindNames. Returns 0 if not found.
|
||||
func (c *Converter) kindForName(name string) uint32 {
|
||||
for k, v := range c.kindNames {
|
||||
if v == name {
|
||||
return k
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// collectRescanEvents walks the AST to find positions that need rescanning.
|
||||
// This matches the Node.js wrapper's rescan logic in ast_extractor.ts.
|
||||
func (c *Converter) collectRescanEvents(root int) []RescanEvent {
|
||||
var events []RescanEvent
|
||||
c.walkForRescan(root, &events)
|
||||
// Sort by position
|
||||
sortRescanEvents(events)
|
||||
return events
|
||||
}
|
||||
|
||||
func (c *Converter) walkForRescan(i int, events *[]RescanEvent) {
|
||||
if i <= 0 || i >= c.ast.NodeCount() {
|
||||
return
|
||||
}
|
||||
if c.ast.IsNodeList(i) {
|
||||
for _, ci := range c.ast.Children(i) {
|
||||
c.walkForRescan(ci, events)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
kind := c.ast.Kind(i)
|
||||
kindName := c.kindNames[kind]
|
||||
|
||||
// RegularExpressionLiteral needs rescan (scanner sees / as SlashToken)
|
||||
if kindName == "RegularExpressionLiteral" {
|
||||
pos := c.augmentBytePos(int(c.ast.Pos(i)))
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "regex"})
|
||||
}
|
||||
|
||||
// TemplateMiddle and TemplateTail need rescan (scanner sees } as CloseBraceToken)
|
||||
if kindName == "TemplateMiddle" || kindName == "TemplateTail" {
|
||||
pos := c.augmentBytePos(int(c.ast.Pos(i)))
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "template"})
|
||||
}
|
||||
|
||||
// BinaryExpression with >>= or >>> etc. needs rescan (scanner may see > separately)
|
||||
if kindName == "BinaryExpression" {
|
||||
children := c.ast.Children(i)
|
||||
if len(children) >= 3 {
|
||||
// BinaryExpression children: left, operatorToken, right
|
||||
opKind := c.kindNames[c.ast.Kind(children[1])]
|
||||
switch opKind {
|
||||
case "GreaterThanEqualsToken", "GreaterThanGreaterThanEqualsToken",
|
||||
"GreaterThanGreaterThanGreaterThanEqualsToken",
|
||||
"GreaterThanGreaterThanGreaterThanToken", "GreaterThanGreaterThanToken":
|
||||
pos := c.augmentBytePos(int(c.ast.Pos(children[1])))
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "greater"})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse into children
|
||||
for _, ci := range c.ast.Children(i) {
|
||||
c.walkForRescan(ci, events)
|
||||
}
|
||||
}
|
||||
|
||||
func sortRescanEvents(events []RescanEvent) {
|
||||
// Simple insertion sort — events are typically few
|
||||
for i := 1; i < len(events); i++ {
|
||||
key := events[i]
|
||||
j := i - 1
|
||||
for j >= 0 && events[j].Pos > key.Pos {
|
||||
events[j+1] = events[j]
|
||||
j--
|
||||
}
|
||||
events[j+1] = key
|
||||
}
|
||||
}
|
||||
|
||||
// FilterWhitelist removes properties from the converted AST that are not
|
||||
// in the property whitelist. This is applied recursively.
|
||||
func FilterWhitelist(obj map[string]interface{}) map[string]interface{} {
|
||||
result := make(map[string]interface{}, len(obj))
|
||||
for k, v := range obj {
|
||||
if !IsAllowedProperty(k) {
|
||||
continue
|
||||
}
|
||||
switch val := v.(type) {
|
||||
case map[string]interface{}:
|
||||
result[k] = FilterWhitelist(val)
|
||||
case []interface{}:
|
||||
result[k] = filterWhitelistArray(val)
|
||||
default:
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func filterWhitelistArray(arr []interface{}) []interface{} {
|
||||
result := make([]interface{}, len(arr))
|
||||
for i, v := range arr {
|
||||
if obj, ok := v.(map[string]interface{}); ok {
|
||||
result[i] = FilterWhitelist(obj)
|
||||
} else {
|
||||
result[i] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// BuildKindToNameMap builds a reverse mapping from numeric kind to string name
|
||||
// from a SyntaxKinds metadata map (name → number).
|
||||
func BuildKindToNameMap(syntaxKinds map[string]int) map[uint32]string {
|
||||
result := make(map[uint32]string, len(syntaxKinds))
|
||||
for name, num := range syntaxKinds {
|
||||
key := uint32(num)
|
||||
// In case of collisions, prefer shorter/simpler names
|
||||
if existing, ok := result[key]; !ok || len(name) < len(existing) {
|
||||
result[key] = name
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// StripKindPrefix removes "Kind" prefix from names if present (for TS7 Go-style names).
|
||||
func StripKindPrefix(name string) string {
|
||||
if strings.HasPrefix(name, "Kind") {
|
||||
return name[4:]
|
||||
}
|
||||
return name
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
// Package astconv decodes the binary AST format produced by the tsgo API
|
||||
// and converts it to the JSON format expected by the Java extractor.
|
||||
//
|
||||
// The binary format is documented in microsoft/typescript-go/internal/api/encoder/encoder.go.
|
||||
// Each source file is encoded as:
|
||||
//
|
||||
// Header (44 bytes) | String offsets | String data | Extended data | Structured data | Nodes (28 bytes each)
|
||||
//
|
||||
// Nodes are in a flat array with parent/next-sibling indices. The first node (index 0)
|
||||
// is a nil sentinel. The root node is at index 1.
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Binary format constants matching microsoft/typescript-go/internal/api/encoder.
|
||||
const (
|
||||
nodeSize = 28 // 7 × uint32
|
||||
|
||||
nodeOffsetKind = 0
|
||||
nodeOffsetPos = 4
|
||||
nodeOffsetEnd = 8
|
||||
nodeOffsetNext = 12
|
||||
nodeOffsetParent = 16
|
||||
nodeOffsetData = 20
|
||||
nodeOffsetFlags = 24
|
||||
|
||||
headerSize = 44
|
||||
headerOffsetMetadata = 0
|
||||
headerOffsetStringOff = 24
|
||||
headerOffsetStringData = 28
|
||||
headerOffsetExtData = 32
|
||||
headerOffsetStructData = 36
|
||||
headerOffsetNodes = 40
|
||||
|
||||
protocolVersion uint8 = 5
|
||||
|
||||
nodeDataTypeChildren uint32 = 0x00_00_00_00
|
||||
nodeDataTypeString uint32 = 0x40_00_00_00
|
||||
nodeDataTypeExtended uint32 = 0x80_00_00_00
|
||||
|
||||
nodeDataTypeMask uint32 = 0xC0_00_00_00
|
||||
nodeDataChildMask uint32 = 0x00_00_00_FF
|
||||
nodeDataStringMask uint32 = 0x00_FF_FF_FF
|
||||
|
||||
// SyntaxKindNodeList is the special kind value used for NodeList nodes.
|
||||
SyntaxKindNodeList uint32 = 0xFF_FF_FF_FF
|
||||
)
|
||||
|
||||
// BinaryAST provides random access to nodes in a binary-encoded TypeScript AST.
|
||||
type BinaryAST struct {
|
||||
raw []byte
|
||||
strOff uint32 // byte offset to string offset pairs
|
||||
strData uint32 // byte offset to string data
|
||||
extData uint32 // byte offset to extended node data
|
||||
structOff uint32 // byte offset to structured data
|
||||
nodeOff uint32 // byte offset to nodes section
|
||||
nodeCount int
|
||||
// Single Go string covering all data from strData onward.
|
||||
// String offsets index into this, so substrings are zero-alloc.
|
||||
allStrData string
|
||||
}
|
||||
|
||||
// DecodeBinaryAST parses the binary header and returns a BinaryAST for
|
||||
// random-access to nodes and strings.
|
||||
func DecodeBinaryAST(data []byte) (*BinaryAST, error) {
|
||||
if len(data) < headerSize {
|
||||
return nil, fmt.Errorf("data too short: %d bytes (need %d)", len(data), headerSize)
|
||||
}
|
||||
|
||||
version := data[headerOffsetMetadata+3]
|
||||
if version != protocolVersion {
|
||||
return nil, fmt.Errorf("unsupported protocol version %d (expected %d)", version, protocolVersion)
|
||||
}
|
||||
|
||||
b := &BinaryAST{
|
||||
raw: data,
|
||||
strOff: le32(data, headerOffsetStringOff),
|
||||
strData: le32(data, headerOffsetStringData),
|
||||
extData: le32(data, headerOffsetExtData),
|
||||
structOff: le32(data, headerOffsetStructData),
|
||||
nodeOff: le32(data, headerOffsetNodes),
|
||||
}
|
||||
|
||||
dataLen := uint32(len(data))
|
||||
if b.strOff > dataLen || b.strData > dataLen || b.extData > dataLen || b.nodeOff > dataLen {
|
||||
return nil, fmt.Errorf("invalid header offsets exceed data length %d", dataLen)
|
||||
}
|
||||
|
||||
b.nodeCount = (len(data) - int(b.nodeOff)) / nodeSize
|
||||
if b.nodeCount < 2 {
|
||||
return nil, fmt.Errorf("no nodes in AST (count=%d, need at least 2)", b.nodeCount)
|
||||
}
|
||||
|
||||
// The official decoder uses data[strData:] for zero-alloc substring slicing.
|
||||
b.allStrData = string(data[b.strData:])
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// DecodeBinaryASTFromBase64 decodes a base64-encoded binary AST, as returned
|
||||
// by tsgo's getSourceFile API in JSON ({"data":"<base64>"}).
|
||||
func DecodeBinaryASTFromBase64(b64 string) (*BinaryAST, error) {
|
||||
data, err := base64.StdEncoding.DecodeString(b64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("base64 decode failed: %w", err)
|
||||
}
|
||||
return DecodeBinaryAST(data)
|
||||
}
|
||||
|
||||
// NodeCount returns the total number of nodes (including the nil sentinel at index 0).
|
||||
func (b *BinaryAST) NodeCount() int { return b.nodeCount }
|
||||
|
||||
// Node field accessors — all read uint32 from the nodes section.
|
||||
|
||||
func (b *BinaryAST) nf(i, offset int) uint32 {
|
||||
return le32(b.raw, int(b.nodeOff)+i*nodeSize+offset)
|
||||
}
|
||||
|
||||
// Kind returns the SyntaxKind of node i.
|
||||
func (b *BinaryAST) Kind(i int) uint32 { return b.nf(i, nodeOffsetKind) }
|
||||
|
||||
// Pos returns the start position (UTF-16 offset) of node i.
|
||||
func (b *BinaryAST) Pos(i int) uint32 { return b.nf(i, nodeOffsetPos) }
|
||||
|
||||
// End returns the end position (UTF-16 offset) of node i.
|
||||
func (b *BinaryAST) End(i int) uint32 { return b.nf(i, nodeOffsetEnd) }
|
||||
|
||||
// Next returns the index of the next sibling of node i, or 0 if none.
|
||||
func (b *BinaryAST) Next(i int) uint32 { return b.nf(i, nodeOffsetNext) }
|
||||
|
||||
// Parent returns the index of the parent of node i, or 0 if none.
|
||||
func (b *BinaryAST) Parent(i int) uint32 { return b.nf(i, nodeOffsetParent) }
|
||||
|
||||
// Data returns the raw 32-bit data field of node i.
|
||||
func (b *BinaryAST) Data(i int) uint32 { return b.nf(i, nodeOffsetData) }
|
||||
|
||||
// Flags returns the NodeFlags of node i.
|
||||
func (b *BinaryAST) Flags(i int) uint32 { return b.nf(i, nodeOffsetFlags) }
|
||||
|
||||
// DataType returns the top 2 bits of the data field (Children, String, or Extended).
|
||||
func (b *BinaryAST) DataType(i int) uint32 { return b.Data(i) & nodeDataTypeMask }
|
||||
|
||||
// DefinedBits returns bits 24-29 of the data field (6 bits of per-node-type flags).
|
||||
func (b *BinaryAST) DefinedBits(i int) uint8 { return uint8((b.Data(i) >> 24) & 0x3F) }
|
||||
|
||||
// ChildMask returns the low byte of the data field (child property bitmask).
|
||||
func (b *BinaryAST) ChildMask(i int) uint8 { return uint8(b.Data(i) & nodeDataChildMask) }
|
||||
|
||||
// StringIndex returns the 24-bit string table index from the data field.
|
||||
func (b *BinaryAST) StringIndex(i int) uint32 { return b.Data(i) & nodeDataStringMask }
|
||||
|
||||
// ExtOffset returns the 24-bit offset into the extended data section from the data field.
|
||||
func (b *BinaryAST) ExtOffset(i int) uint32 { return b.Data(i) & nodeDataStringMask }
|
||||
|
||||
// NodeListLen returns the number of children for a NodeList node (stored in data field).
|
||||
func (b *BinaryAST) NodeListLen(i int) uint32 { return b.Data(i) }
|
||||
|
||||
// IsNodeList returns true if node i is a NodeList.
|
||||
func (b *BinaryAST) IsNodeList(i int) bool { return b.Kind(i) == SyntaxKindNodeList }
|
||||
|
||||
// GetString reads a string from the string table at the given offset index.
|
||||
// The index comes from a String-type node's data field (24-bit value).
|
||||
func (b *BinaryAST) GetString(idx uint32) string {
|
||||
// Each string entry is two uint32 values (start, end) in the string offsets section.
|
||||
offBase := int(b.strOff) + int(idx)*4
|
||||
start := le32(b.raw, offBase)
|
||||
end := le32(b.raw, offBase+4)
|
||||
return b.allStrData[start:end]
|
||||
}
|
||||
|
||||
// ExtUint32 reads a uint32 from the extended data section at the given byte offset.
|
||||
func (b *BinaryAST) ExtUint32(off uint32) uint32 {
|
||||
return le32(b.raw, int(b.extData)+int(off))
|
||||
}
|
||||
|
||||
// Children returns the indices of all direct children of node i.
|
||||
// Children are identified by having parent == i. The first child is at i+1
|
||||
// (if its parent is i), and subsequent children are found via Next pointers.
|
||||
func (b *BinaryAST) Children(i int) []int {
|
||||
if i+1 >= b.nodeCount {
|
||||
return nil
|
||||
}
|
||||
firstChild := i + 1
|
||||
if b.Parent(firstChild) != uint32(i) {
|
||||
return nil
|
||||
}
|
||||
children := []int{firstChild}
|
||||
next := int(b.Next(firstChild))
|
||||
for next != 0 {
|
||||
children = append(children, next)
|
||||
next = int(b.Next(next))
|
||||
}
|
||||
return children
|
||||
}
|
||||
|
||||
// SourceText returns the source file text, extracted from the SourceFile's
|
||||
// extended data. Returns "" if the root node is not a SourceFile or if
|
||||
// the extended data is missing.
|
||||
func (b *BinaryAST) SourceText() string {
|
||||
if b.nodeCount < 2 {
|
||||
return ""
|
||||
}
|
||||
// Root is at index 1. Check if it has extended data type.
|
||||
if b.DataType(1)&nodeDataTypeMask != nodeDataTypeExtended {
|
||||
return ""
|
||||
}
|
||||
extOff := b.ExtOffset(1)
|
||||
textIdx := b.ExtUint32(extOff)
|
||||
return b.GetString(textIdx)
|
||||
}
|
||||
|
||||
func le32(data []byte, offset int) uint32 {
|
||||
if offset < 0 || offset+4 > len(data) {
|
||||
return 0
|
||||
}
|
||||
return binary.LittleEndian.Uint32(data[offset : offset+4])
|
||||
}
|
||||
@@ -0,0 +1,876 @@
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TS7 SyntaxKind values for tokens (from microsoft/typescript-go internal/ast/kind.go).
|
||||
const (
|
||||
KindUnknown = 0
|
||||
KindEndOfFile = 1
|
||||
KindSingleLineCommentTrivia = 2
|
||||
KindMultiLineCommentTrivia = 3
|
||||
KindNewLineTrivia = 4
|
||||
KindWhitespaceTrivia = 5
|
||||
KindShebangTrivia = 6
|
||||
KindConflictMarkerTrivia = 7
|
||||
KindNumericLiteral = 8
|
||||
KindBigIntLiteral = 9
|
||||
KindStringLiteral = 10
|
||||
KindRegularExpressionLiteral = 13
|
||||
KindNoSubstitutionTemplateLiteral = 14
|
||||
KindTemplateHead = 15
|
||||
KindTemplateMiddle = 16
|
||||
KindTemplateTail = 17
|
||||
KindOpenBraceToken = 18
|
||||
KindCloseBraceToken = 19
|
||||
KindOpenParenToken = 20
|
||||
KindCloseParenToken = 21
|
||||
KindOpenBracketToken = 22
|
||||
KindCloseBracketToken = 23
|
||||
KindDotToken = 24
|
||||
KindDotDotDotToken = 25
|
||||
KindSemicolonToken = 26
|
||||
KindCommaToken = 27
|
||||
KindQuestionDotToken = 28
|
||||
KindLessThanToken = 29
|
||||
KindLessThanSlashToken = 30
|
||||
KindGreaterThanToken = 31
|
||||
KindLessThanEqualsToken = 32
|
||||
KindGreaterThanEqualsToken = 33
|
||||
KindEqualsEqualsToken = 34
|
||||
KindExclamationEqualsToken = 35
|
||||
KindEqualsEqualsEqualsToken = 36
|
||||
KindExclamationEqualsEqualsToken = 37
|
||||
KindEqualsGreaterThanToken = 38
|
||||
KindPlusToken = 39
|
||||
KindMinusToken = 40
|
||||
KindAsteriskToken = 41
|
||||
KindAsteriskAsteriskToken = 42
|
||||
KindSlashToken = 43
|
||||
KindPercentToken = 44
|
||||
KindPlusPlusToken = 45
|
||||
KindMinusMinusToken = 46
|
||||
KindLessThanLessThanToken = 47
|
||||
KindGreaterThanGreaterThanToken = 48
|
||||
KindGreaterThanGreaterThanGreaterThanToken = 49
|
||||
KindAmpersandToken = 50
|
||||
KindBarToken = 51
|
||||
KindCaretToken = 52
|
||||
KindExclamationToken = 53
|
||||
KindTildeToken = 54
|
||||
KindAmpersandAmpersandToken = 55
|
||||
KindBarBarToken = 56
|
||||
KindQuestionToken = 57
|
||||
KindColonToken = 58
|
||||
KindAtToken = 59
|
||||
KindQuestionQuestionToken = 60
|
||||
KindHashToken = 62
|
||||
KindEqualsToken = 63
|
||||
KindPlusEqualsToken = 64
|
||||
KindMinusEqualsToken = 65
|
||||
KindAsteriskEqualsToken = 66
|
||||
KindAsteriskAsteriskEqualsToken = 67
|
||||
KindSlashEqualsToken = 68
|
||||
KindPercentEqualsToken = 69
|
||||
KindLessThanLessThanEqualsToken = 70
|
||||
KindGreaterThanGreaterThanEqualsToken = 71
|
||||
KindGreaterThanGreaterThanGreaterThanEqualsToken = 72
|
||||
KindAmpersandEqualsToken = 73
|
||||
KindBarEqualsToken = 74
|
||||
KindBarBarEqualsToken = 75
|
||||
KindAmpersandAmpersandEqualsToken = 76
|
||||
KindQuestionQuestionEqualsToken = 77
|
||||
KindCaretEqualsToken = 78
|
||||
KindIdentifier = 79
|
||||
KindPrivateIdentifier = 80
|
||||
)
|
||||
|
||||
// Token represents a single token from the scanner.
|
||||
type Token struct {
|
||||
Kind int `json:"kind"`
|
||||
TokenPos int `json:"tokenPos"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// RescanEvent tells the scanner to rescan at a given position.
|
||||
type RescanEvent struct {
|
||||
Pos int
|
||||
Kind string // "regex", "template", "greater"
|
||||
}
|
||||
|
||||
// Scanner tokenizes TypeScript source text.
|
||||
type Scanner struct {
|
||||
text string
|
||||
pos int
|
||||
events []RescanEvent
|
||||
evIdx int
|
||||
}
|
||||
|
||||
// NewScanner creates a scanner for the given source text.
|
||||
// rescanEvents should be sorted by position. They inform the scanner
|
||||
// about positions where regex literals, template tokens, or greater-than
|
||||
// rescanning is needed (matching the Node.js wrapper behavior).
|
||||
func NewScanner(text string, rescanEvents []RescanEvent) *Scanner {
|
||||
return &Scanner{
|
||||
text: text,
|
||||
pos: 0,
|
||||
events: rescanEvents,
|
||||
evIdx: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// ScanAll produces all tokens from the source text, including trivia
|
||||
// (whitespace, newlines, comments), matching the Node.js wrapper behavior.
|
||||
func (s *Scanner) ScanAll() []Token {
|
||||
var tokens []Token
|
||||
for {
|
||||
tok := s.scan()
|
||||
tokens = append(tokens, tok)
|
||||
if tok.Kind == KindEndOfFile {
|
||||
break
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
func (s *Scanner) peek() byte {
|
||||
if s.pos >= len(s.text) {
|
||||
return 0
|
||||
}
|
||||
return s.text[s.pos]
|
||||
}
|
||||
|
||||
func (s *Scanner) peekAt(offset int) byte {
|
||||
p := s.pos + offset
|
||||
if p >= len(s.text) {
|
||||
return 0
|
||||
}
|
||||
return s.text[p]
|
||||
}
|
||||
|
||||
func (s *Scanner) advance() {
|
||||
s.pos++
|
||||
}
|
||||
|
||||
func (s *Scanner) nextRescanPos() int {
|
||||
if s.evIdx < len(s.events) {
|
||||
return s.events[s.evIdx].Pos
|
||||
}
|
||||
return int(^uint(0) >> 1) // MaxInt
|
||||
}
|
||||
|
||||
func (s *Scanner) nextRescanKind() string {
|
||||
if s.evIdx < len(s.events) {
|
||||
return s.events[s.evIdx].Kind
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s *Scanner) consumeRescan() {
|
||||
if s.evIdx < len(s.events) {
|
||||
s.evIdx++
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scan() Token {
|
||||
if s.pos >= len(s.text) {
|
||||
return Token{Kind: KindEndOfFile, TokenPos: s.pos, Text: ""}
|
||||
}
|
||||
|
||||
tokenPos := s.pos
|
||||
ch := s.peek()
|
||||
|
||||
// Whitespace (not newlines)
|
||||
if ch == ' ' || ch == '\t' || ch == '\f' || ch == '\v' {
|
||||
for s.pos < len(s.text) {
|
||||
c := s.text[s.pos]
|
||||
if c == ' ' || c == '\t' || c == '\f' || c == '\v' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return Token{Kind: KindWhitespaceTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
// Newlines
|
||||
if ch == '\n' {
|
||||
s.advance()
|
||||
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: "\n"}
|
||||
}
|
||||
if ch == '\r' {
|
||||
s.advance()
|
||||
if s.peek() == '\n' {
|
||||
s.advance()
|
||||
}
|
||||
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
// Check for rescan event at this position.
|
||||
// TS5's scanner loop captures the token kind BEFORE the rescan event fires,
|
||||
// then uses the rescanned text. So regex tokens get kind=SlashToken with
|
||||
// text="/pattern/flags", and template continuation tokens get kind=CloseBraceToken
|
||||
// with the template text. We replicate this by scanning the full content but
|
||||
// using the pre-rescan kind.
|
||||
if tokenPos == s.nextRescanPos() {
|
||||
kind := s.nextRescanKind()
|
||||
s.consumeRescan()
|
||||
switch kind {
|
||||
case "regex":
|
||||
tok := s.scanRegExp(tokenPos)
|
||||
tok.Kind = KindSlashToken
|
||||
return tok
|
||||
case "template":
|
||||
tok := s.scanTemplatePart(tokenPos, true)
|
||||
tok.Kind = KindCloseBraceToken
|
||||
return tok
|
||||
case "greater":
|
||||
return s.scanGreater(tokenPos)
|
||||
}
|
||||
}
|
||||
|
||||
switch ch {
|
||||
case '/':
|
||||
next := s.peekAt(1)
|
||||
if next == '/' {
|
||||
return s.scanSingleLineComment(tokenPos)
|
||||
}
|
||||
if next == '*' {
|
||||
return s.scanMultiLineComment(tokenPos)
|
||||
}
|
||||
if next == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindSlashEqualsToken, TokenPos: tokenPos, Text: "/="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindSlashToken, TokenPos: tokenPos, Text: "/"}
|
||||
|
||||
case '\'', '"':
|
||||
return s.scanString(tokenPos, ch)
|
||||
|
||||
case '`':
|
||||
return s.scanTemplatePart(tokenPos, false)
|
||||
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
return s.scanNumber(tokenPos)
|
||||
|
||||
case '{':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenBraceToken, TokenPos: tokenPos, Text: "{"}
|
||||
case '}':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseBraceToken, TokenPos: tokenPos, Text: "}"}
|
||||
case '(':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenParenToken, TokenPos: tokenPos, Text: "("}
|
||||
case ')':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseParenToken, TokenPos: tokenPos, Text: ")"}
|
||||
case '[':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenBracketToken, TokenPos: tokenPos, Text: "["}
|
||||
case ']':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseBracketToken, TokenPos: tokenPos, Text: "]"}
|
||||
case ';':
|
||||
s.advance()
|
||||
return Token{Kind: KindSemicolonToken, TokenPos: tokenPos, Text: ";"}
|
||||
case ',':
|
||||
s.advance()
|
||||
return Token{Kind: KindCommaToken, TokenPos: tokenPos, Text: ","}
|
||||
case '~':
|
||||
s.advance()
|
||||
return Token{Kind: KindTildeToken, TokenPos: tokenPos, Text: "~"}
|
||||
case '@':
|
||||
s.advance()
|
||||
return Token{Kind: KindAtToken, TokenPos: tokenPos, Text: "@"}
|
||||
|
||||
case '.':
|
||||
if s.peekAt(1) == '.' && s.peekAt(2) == '.' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindDotDotDotToken, TokenPos: tokenPos, Text: "..."}
|
||||
}
|
||||
// .123 numeric literal
|
||||
if s.peekAt(1) >= '0' && s.peekAt(1) <= '9' {
|
||||
return s.scanNumber(tokenPos)
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindDotToken, TokenPos: tokenPos, Text: "."}
|
||||
|
||||
case ':':
|
||||
s.advance()
|
||||
return Token{Kind: KindColonToken, TokenPos: tokenPos, Text: ":"}
|
||||
|
||||
case '?':
|
||||
if s.peekAt(1) == '.' && !(s.peekAt(2) >= '0' && s.peekAt(2) <= '9') {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindQuestionDotToken, TokenPos: tokenPos, Text: "?."}
|
||||
}
|
||||
if s.peekAt(1) == '?' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindQuestionQuestionEqualsToken, TokenPos: tokenPos, Text: "??="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindQuestionQuestionToken, TokenPos: tokenPos, Text: "??"}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindQuestionToken, TokenPos: tokenPos, Text: "?"}
|
||||
|
||||
case '!':
|
||||
if s.peekAt(1) == '=' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindExclamationEqualsEqualsToken, TokenPos: tokenPos, Text: "!=="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindExclamationEqualsToken, TokenPos: tokenPos, Text: "!="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindExclamationToken, TokenPos: tokenPos, Text: "!"}
|
||||
|
||||
case '=':
|
||||
if s.peekAt(1) == '=' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindEqualsEqualsEqualsToken, TokenPos: tokenPos, Text: "==="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindEqualsEqualsToken, TokenPos: tokenPos, Text: "=="}
|
||||
}
|
||||
if s.peekAt(1) == '>' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindEqualsGreaterThanToken, TokenPos: tokenPos, Text: "=>"}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindEqualsToken, TokenPos: tokenPos, Text: "="}
|
||||
|
||||
case '+':
|
||||
if s.peekAt(1) == '+' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPlusPlusToken, TokenPos: tokenPos, Text: "++"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPlusEqualsToken, TokenPos: tokenPos, Text: "+="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindPlusToken, TokenPos: tokenPos, Text: "+"}
|
||||
|
||||
case '-':
|
||||
if s.peekAt(1) == '-' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMinusMinusToken, TokenPos: tokenPos, Text: "--"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMinusEqualsToken, TokenPos: tokenPos, Text: "-="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindMinusToken, TokenPos: tokenPos, Text: "-"}
|
||||
|
||||
case '*':
|
||||
if s.peekAt(1) == '*' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindAsteriskAsteriskEqualsToken, TokenPos: tokenPos, Text: "**="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAsteriskAsteriskToken, TokenPos: tokenPos, Text: "**"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAsteriskEqualsToken, TokenPos: tokenPos, Text: "*="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindAsteriskToken, TokenPos: tokenPos, Text: "*"}
|
||||
|
||||
case '%':
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPercentEqualsToken, TokenPos: tokenPos, Text: "%="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindPercentToken, TokenPos: tokenPos, Text: "%"}
|
||||
|
||||
case '<':
|
||||
if s.peekAt(1) == '<' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindLessThanLessThanEqualsToken, TokenPos: tokenPos, Text: "<<="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanLessThanToken, TokenPos: tokenPos, Text: "<<"}
|
||||
}
|
||||
if s.peekAt(1) == '/' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanSlashToken, TokenPos: tokenPos, Text: "</"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanEqualsToken, TokenPos: tokenPos, Text: "<="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindLessThanToken, TokenPos: tokenPos, Text: "<"}
|
||||
|
||||
case '>':
|
||||
// TypeScript scanner always produces single > tokens.
|
||||
// Multi-character operators (>>, >>>, >>=, etc.) are produced
|
||||
// only via reScanGreaterToken when the parser requests it.
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanToken, TokenPos: tokenPos, Text: ">"}
|
||||
|
||||
case '&':
|
||||
if s.peekAt(1) == '&' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindAmpersandAmpersandEqualsToken, TokenPos: tokenPos, Text: "&&="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAmpersandAmpersandToken, TokenPos: tokenPos, Text: "&&"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAmpersandEqualsToken, TokenPos: tokenPos, Text: "&="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindAmpersandToken, TokenPos: tokenPos, Text: "&"}
|
||||
|
||||
case '|':
|
||||
if s.peekAt(1) == '|' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindBarBarEqualsToken, TokenPos: tokenPos, Text: "||="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindBarBarToken, TokenPos: tokenPos, Text: "||"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindBarEqualsToken, TokenPos: tokenPos, Text: "|="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindBarToken, TokenPos: tokenPos, Text: "|"}
|
||||
|
||||
case '^':
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindCaretEqualsToken, TokenPos: tokenPos, Text: "^="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindCaretToken, TokenPos: tokenPos, Text: "^"}
|
||||
|
||||
case '#':
|
||||
// Could be private identifier
|
||||
if s.peekAt(1) == '!' && tokenPos == 0 {
|
||||
// Shebang — scan to end of line, emit as ShebangTrivia
|
||||
start := s.pos
|
||||
for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' {
|
||||
s.pos++
|
||||
}
|
||||
text := s.text[start:s.pos]
|
||||
return Token{Kind: KindShebangTrivia, TokenPos: tokenPos, Text: text}
|
||||
}
|
||||
if isIdentStart(s.peekAt(1)) {
|
||||
return s.scanPrivateIdentifier(tokenPos)
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindHashToken, TokenPos: tokenPos, Text: "#"}
|
||||
}
|
||||
|
||||
// Identifier or keyword
|
||||
if isIdentStartByte(ch) {
|
||||
return s.scanIdentifierOrKeyword(tokenPos)
|
||||
}
|
||||
|
||||
// Handle multi-byte Unicode identifier starts
|
||||
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
|
||||
if r != utf8.RuneError && isIdentStartRune(r) {
|
||||
return s.scanIdentifierOrKeyword(tokenPos)
|
||||
}
|
||||
|
||||
// Unknown character
|
||||
s.pos += size
|
||||
return Token{Kind: KindUnknown, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanSingleLineComment(start int) Token {
|
||||
s.pos += 2 // skip //
|
||||
for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindSingleLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanMultiLineComment(start int) Token {
|
||||
s.pos += 2 // skip /*
|
||||
for s.pos < len(s.text)-1 {
|
||||
if s.text[s.pos] == '*' && s.text[s.pos+1] == '/' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
// Unterminated
|
||||
s.pos = len(s.text)
|
||||
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanString(start int, quote byte) Token {
|
||||
s.advance() // skip opening quote
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == quote {
|
||||
s.advance()
|
||||
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '\n' || ch == '\r' {
|
||||
// Unterminated string
|
||||
break
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanTemplatePart(start int, isRescan bool) Token {
|
||||
if isRescan {
|
||||
// We're at a '}' that needs to be rescanned as TemplateMiddle or TemplateTail
|
||||
s.advance() // skip }
|
||||
} else {
|
||||
s.advance() // skip `
|
||||
}
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == '`' {
|
||||
s.advance()
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '$' && s.peekAt(1) == '{' {
|
||||
s.pos += 2
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateMiddle, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindTemplateHead, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
// Unterminated
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanRegExp(start int) Token {
|
||||
s.advance() // skip /
|
||||
inCharClass := false
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == '[' {
|
||||
inCharClass = true
|
||||
s.pos++
|
||||
continue
|
||||
}
|
||||
if ch == ']' {
|
||||
inCharClass = false
|
||||
s.pos++
|
||||
continue
|
||||
}
|
||||
if ch == '/' && !inCharClass {
|
||||
s.advance() // skip closing /
|
||||
// Scan flags
|
||||
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '\n' || ch == '\r' {
|
||||
break
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanGreater(start int) Token {
|
||||
s.advance() // skip >
|
||||
if s.peek() == '>' {
|
||||
s.advance()
|
||||
if s.peek() == '>' {
|
||||
s.advance()
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>>="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanGreaterThanGreaterThanToken, TokenPos: start, Text: ">>>"}
|
||||
}
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanGreaterThanToken, TokenPos: start, Text: ">>"}
|
||||
}
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanEqualsToken, TokenPos: start, Text: ">="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanToken, TokenPos: start, Text: ">"}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanNumber(start int) Token {
|
||||
if s.peek() == '0' {
|
||||
next := s.peekAt(1)
|
||||
if next == 'x' || next == 'X' {
|
||||
s.pos += 2
|
||||
s.scanHexDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
if next == 'b' || next == 'B' {
|
||||
s.pos += 2
|
||||
s.scanBinaryDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
if next == 'o' || next == 'O' {
|
||||
s.pos += 2
|
||||
s.scanOctalDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
}
|
||||
|
||||
s.scanDecimalDigits()
|
||||
if s.peek() == '.' {
|
||||
s.advance()
|
||||
s.scanDecimalDigits()
|
||||
}
|
||||
if s.peek() == 'e' || s.peek() == 'E' {
|
||||
s.advance()
|
||||
if s.peek() == '+' || s.peek() == '-' {
|
||||
s.advance()
|
||||
}
|
||||
s.scanDecimalDigits()
|
||||
}
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
|
||||
func (s *Scanner) finishBigIntOrNumber(start int) Token {
|
||||
if s.peek() == 'n' {
|
||||
s.advance()
|
||||
return Token{Kind: KindBigIntLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNumericLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanDecimalDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '9') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanHexDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanBinaryDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '0' || ch == '1' || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanOctalDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '7') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanIdentifierOrKeyword(start int) Token {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if isIdentChar(ch) {
|
||||
s.pos++
|
||||
} else if ch >= 0x80 {
|
||||
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
|
||||
if r != utf8.RuneError && isIdentContinueRune(r) {
|
||||
s.pos += size
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
text := s.text[start:s.pos]
|
||||
if kind, ok := keywordKinds[text]; ok {
|
||||
return Token{Kind: kind, TokenPos: start, Text: text}
|
||||
}
|
||||
return Token{Kind: KindIdentifier, TokenPos: start, Text: text}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanPrivateIdentifier(start int) Token {
|
||||
s.advance() // skip #
|
||||
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindPrivateIdentifier, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func isIdentStartByte(ch byte) bool {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '$'
|
||||
}
|
||||
|
||||
func isIdentStart(ch byte) bool {
|
||||
return isIdentStartByte(ch)
|
||||
}
|
||||
|
||||
func isIdentStartRune(r rune) bool {
|
||||
// JavaScript ID_Start: Lu, Ll, Lt, Lm, Lo, Nl, plus _ and $
|
||||
return unicode.IsLetter(r) || unicode.Is(unicode.Nl, r) || r == '_' || r == '$'
|
||||
}
|
||||
|
||||
// isIdentContinueRune returns true if the rune is valid in a JS identifier (not first position).
|
||||
// JavaScript ID_Continue: ID_Start + Mn, Mc, Nd, Pc, plus ZWNJ/ZWJ.
|
||||
func isIdentContinueRune(r rune) bool {
|
||||
return unicode.IsLetter(r) ||
|
||||
unicode.IsDigit(r) ||
|
||||
unicode.Is(unicode.Nl, r) ||
|
||||
unicode.Is(unicode.Mn, r) ||
|
||||
unicode.Is(unicode.Mc, r) ||
|
||||
unicode.Is(unicode.Pc, r) ||
|
||||
r == '_' || r == '$' ||
|
||||
r == '\u200C' || r == '\u200D'
|
||||
}
|
||||
|
||||
func isIdentChar(ch byte) bool {
|
||||
return isIdentStartByte(ch) || (ch >= '0' && ch <= '9')
|
||||
}
|
||||
|
||||
// keywordKinds maps keyword text to TS7 SyntaxKind values.
|
||||
// These start at KindBreakKeyword = 82.
|
||||
var keywordKinds = map[string]int{
|
||||
"break": 82,
|
||||
"case": 83,
|
||||
"catch": 84,
|
||||
"class": 85,
|
||||
"const": 86,
|
||||
"continue": 87,
|
||||
"debugger": 88,
|
||||
"default": 89,
|
||||
"delete": 90,
|
||||
"do": 91,
|
||||
"else": 92,
|
||||
"enum": 93,
|
||||
"export": 94,
|
||||
"extends": 95,
|
||||
"false": 96,
|
||||
"finally": 97,
|
||||
"for": 98,
|
||||
"function": 99,
|
||||
"if": 100,
|
||||
"import": 101,
|
||||
"in": 102,
|
||||
"instanceof": 103,
|
||||
"new": 104,
|
||||
"null": 105,
|
||||
"return": 106,
|
||||
"super": 107,
|
||||
"switch": 108,
|
||||
"this": 109,
|
||||
"throw": 110,
|
||||
"true": 111,
|
||||
"try": 112,
|
||||
"typeof": 113,
|
||||
"var": 114,
|
||||
"void": 115,
|
||||
"while": 116,
|
||||
"with": 117,
|
||||
// Strict mode reserved words
|
||||
"implements": 118,
|
||||
"interface": 119,
|
||||
"let": 120,
|
||||
"package": 121,
|
||||
"private": 122,
|
||||
"protected": 123,
|
||||
"public": 124,
|
||||
"static": 125,
|
||||
"yield": 126,
|
||||
// Contextual keywords
|
||||
"abstract": 127,
|
||||
"accessor": 128,
|
||||
"as": 129,
|
||||
"asserts": 130,
|
||||
"assert": 131,
|
||||
"any": 132,
|
||||
"async": 133,
|
||||
"await": 134,
|
||||
"boolean": 135,
|
||||
"constructor": 136,
|
||||
"declare": 137,
|
||||
"get": 138,
|
||||
"immediate": 139,
|
||||
"infer": 140,
|
||||
"intrinsic": 141,
|
||||
"is": 142,
|
||||
"keyof": 143,
|
||||
"module": 144,
|
||||
"namespace": 145,
|
||||
"never": 146,
|
||||
"out": 147,
|
||||
"readonly": 148,
|
||||
"require": 149,
|
||||
"number": 150,
|
||||
"object": 151,
|
||||
"satisfies": 152,
|
||||
"set": 153,
|
||||
"string": 154,
|
||||
"symbol": 155,
|
||||
"type": 156,
|
||||
"undefined": 157,
|
||||
"unique": 158,
|
||||
"unknown": 159,
|
||||
"using": 160,
|
||||
"from": 161,
|
||||
"global": 162,
|
||||
"bigint": 163,
|
||||
"override": 164,
|
||||
"of": 165,
|
||||
"defer": 166,
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
// Package astconv handles conversion between TypeScript 7's AST representation
|
||||
// and the JSON format expected by the Java extractor.
|
||||
//
|
||||
// The Java extractor expects AST nodes to have:
|
||||
// - "kind" as a symbolic string name (e.g., "SourceFile"), not a numeric value
|
||||
// - "$pos" and "$end" as character offsets
|
||||
// - "$lineStarts" on the root SourceFile node
|
||||
// - "$tokens" array on the root node
|
||||
// - "$declarationKind" on VariableDeclarationList nodes ("var", "let", "const")
|
||||
// - Only whitelisted property names (see propertyWhitelist)
|
||||
package astconv
|
||||
|
||||
// PropertyWhitelist is the set of property names that should be included
|
||||
// in the serialized AST JSON. This must match the whitelist in the Node.js
|
||||
// wrapper (main.ts).
|
||||
var PropertyWhitelist = map[string]bool{
|
||||
"$declarationKind": true,
|
||||
"$end": true,
|
||||
"$lineStarts": true,
|
||||
"$overloadIndex": true,
|
||||
"$pos": true,
|
||||
"$tokens": true,
|
||||
"argument": true,
|
||||
"argumentExpression": true,
|
||||
"arguments": true,
|
||||
"assertsModifier": true,
|
||||
"asteriskToken": true,
|
||||
"attributes": true,
|
||||
"block": true,
|
||||
"body": true,
|
||||
"caseBlock": true,
|
||||
"catchClause": true,
|
||||
"checkType": true,
|
||||
"children": true,
|
||||
"clauses": true,
|
||||
"closingElement": true,
|
||||
"closingFragment": true,
|
||||
"condition": true,
|
||||
"constraint": true,
|
||||
"constructor": true,
|
||||
"declarationList": true,
|
||||
"declarations": true,
|
||||
"default": true,
|
||||
"delete": true,
|
||||
"dotDotDotToken": true,
|
||||
"elements": true,
|
||||
"elementType": true,
|
||||
"elementTypes": true,
|
||||
"elseStatement": true,
|
||||
"escapedText": true,
|
||||
"exclamationToken": true,
|
||||
"exportClause": true,
|
||||
"expression": true,
|
||||
"exprName": true,
|
||||
"extendsType": true,
|
||||
"falseType": true,
|
||||
"finallyBlock": true,
|
||||
"flags": true,
|
||||
"head": true,
|
||||
"heritageClauses": true,
|
||||
"importClause": true,
|
||||
"incrementor": true,
|
||||
"indexType": true,
|
||||
"init": true,
|
||||
"initializer": true,
|
||||
"isExportEquals": true,
|
||||
"isTypeOf": true,
|
||||
"isTypeOnly": true,
|
||||
"keywordToken": true,
|
||||
"kind": true,
|
||||
"label": true,
|
||||
"left": true,
|
||||
"literal": true,
|
||||
"members": true,
|
||||
"messageText": true,
|
||||
"modifiers": true,
|
||||
"moduleReference": true,
|
||||
"moduleSpecifier": true,
|
||||
"name": true,
|
||||
"namedBindings": true,
|
||||
"objectType": true,
|
||||
"openingElement": true,
|
||||
"openingFragment": true,
|
||||
"operand": true,
|
||||
"operator": true,
|
||||
"operatorToken": true,
|
||||
"parameterName": true,
|
||||
"parameters": true,
|
||||
"parseDiagnostics": true,
|
||||
"phaseModifier": true,
|
||||
"properties": true,
|
||||
"propertyName": true,
|
||||
"qualifier": true,
|
||||
"questionDotToken": true,
|
||||
"questionToken": true,
|
||||
"right": true,
|
||||
"selfClosing": true,
|
||||
"statement": true,
|
||||
"statements": true,
|
||||
"tag": true,
|
||||
"tagName": true,
|
||||
"template": true,
|
||||
"templateSpans": true,
|
||||
"text": true,
|
||||
"thenStatement": true,
|
||||
"token": true,
|
||||
"tokenPos": true,
|
||||
"trueType": true,
|
||||
"tryBlock": true,
|
||||
"type": true,
|
||||
"typeArguments": true,
|
||||
"typeName": true,
|
||||
"typeParameter": true,
|
||||
"typeParameters": true,
|
||||
"types": true,
|
||||
"variableDeclaration": true,
|
||||
"whenFalse": true,
|
||||
"whenTrue": true,
|
||||
}
|
||||
|
||||
// MetaProperties are property names used in the parse response wrapper
|
||||
// (not part of the AST itself but part of the response envelope).
|
||||
var MetaProperties = map[string]bool{
|
||||
"ast": true,
|
||||
"type": true,
|
||||
}
|
||||
|
||||
// IsAllowedProperty returns true if the property name should be included
|
||||
// in the serialized AST JSON.
|
||||
func IsAllowedProperty(name string) bool {
|
||||
if PropertyWhitelist[name] {
|
||||
return true
|
||||
}
|
||||
if MetaProperties[name] {
|
||||
return true
|
||||
}
|
||||
// Numeric keys (array indices) are always allowed
|
||||
if len(name) > 0 && name[0] >= '0' && name[0] <= '9' {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package astconv
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestAllowedProperties(t *testing.T) {
|
||||
allowed := []string{"kind", "$pos", "$end", "statements", "body", "name", "type"}
|
||||
for _, p := range allowed {
|
||||
if !IsAllowedProperty(p) {
|
||||
t.Errorf("expected %q to be allowed", p)
|
||||
}
|
||||
}
|
||||
|
||||
disallowed := []string{"parent", "symbol", "localSymbol", "nextContainer", "flowNode"}
|
||||
for _, p := range disallowed {
|
||||
if IsAllowedProperty(p) {
|
||||
t.Errorf("expected %q to be disallowed", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNumericKeys(t *testing.T) {
|
||||
for _, k := range []string{"0", "1", "42", "999"} {
|
||||
if !IsAllowedProperty(k) {
|
||||
t.Errorf("expected numeric key %q to be allowed", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetaProperties(t *testing.T) {
|
||||
if !IsAllowedProperty("ast") {
|
||||
t.Error("expected 'ast' to be allowed (meta property)")
|
||||
}
|
||||
if !IsAllowedProperty("type") {
|
||||
t.Error("expected 'type' to be allowed (meta property)")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
// Package protocol implements the line-delimited JSON protocol used to
|
||||
// communicate between the Java extractor and the TypeScript parser wrapper.
|
||||
//
|
||||
// The protocol matches the one implemented by the Node.js wrapper in
|
||||
// lib/typescript/src/main.ts. Commands are read from stdin as one JSON
|
||||
// object per line, and responses are written to stdout in the same format.
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Command represents a parsed command from the Java extractor.
|
||||
type Command struct {
|
||||
Command string `json:"command"`
|
||||
Filename string `json:"filename,omitempty"`
|
||||
Filenames []string `json:"filenames,omitempty"`
|
||||
}
|
||||
|
||||
// Response is the interface for all protocol responses.
|
||||
type Response interface {
|
||||
ResponseType() string
|
||||
}
|
||||
|
||||
// MetadataResponse is sent in reply to a "get-metadata" command.
|
||||
type MetadataResponse struct {
|
||||
Type string `json:"type"`
|
||||
SyntaxKinds map[string]int `json:"syntaxKinds"`
|
||||
NodeFlags map[string]int `json:"nodeFlags"`
|
||||
}
|
||||
|
||||
func (r *MetadataResponse) ResponseType() string { return "metadata" }
|
||||
|
||||
// OKResponse is sent in reply to a "prepare-files" command.
|
||||
type OKResponse struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (r *OKResponse) ResponseType() string { return "ok" }
|
||||
|
||||
// ASTResponse is sent in reply to a "parse" command.
|
||||
type ASTResponse struct {
|
||||
Type string `json:"type"`
|
||||
AST interface{} `json:"ast"`
|
||||
}
|
||||
|
||||
func (r *ASTResponse) ResponseType() string { return "ast" }
|
||||
|
||||
// ResetDoneResponse is sent in reply to a "reset" command.
|
||||
type ResetDoneResponse struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (r *ResetDoneResponse) ResponseType() string { return "reset-done" }
|
||||
|
||||
// ErrorResponse is sent when an error occurs during processing.
|
||||
type ErrorResponse struct {
|
||||
Type string `json:"type"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func (r *ErrorResponse) ResponseType() string { return "error" }
|
||||
|
||||
// Handler defines the interface for handling protocol commands.
|
||||
type Handler interface {
|
||||
// HandleParse parses a TypeScript file and returns the AST.
|
||||
HandleParse(filename string) (interface{}, error)
|
||||
|
||||
// HandlePrepareFiles informs the handler that the given files will be
|
||||
// requested in order, allowing pre-parsing.
|
||||
HandlePrepareFiles(filenames []string) error
|
||||
|
||||
// HandleReset resets the handler to a fresh state.
|
||||
HandleReset() error
|
||||
|
||||
// HandleGetMetadata returns the syntax kind and node flag mappings.
|
||||
HandleGetMetadata() (*MetadataResponse, error)
|
||||
}
|
||||
|
||||
// Server reads commands from stdin and dispatches them to a Handler.
|
||||
type Server struct {
|
||||
handler Handler
|
||||
in io.Reader
|
||||
out io.Writer
|
||||
}
|
||||
|
||||
// NewServer creates a new protocol server.
|
||||
func NewServer(handler Handler) *Server {
|
||||
return &Server{
|
||||
handler: handler,
|
||||
in: os.Stdin,
|
||||
out: os.Stdout,
|
||||
}
|
||||
}
|
||||
|
||||
// NewServerWithIO creates a server with custom I/O streams (for testing).
|
||||
func NewServerWithIO(handler Handler, in io.Reader, out io.Writer) *Server {
|
||||
return &Server{
|
||||
handler: handler,
|
||||
in: in,
|
||||
out: out,
|
||||
}
|
||||
}
|
||||
|
||||
// Run reads commands from stdin and processes them until a "quit" command
|
||||
// is received or stdin is closed.
|
||||
func (s *Server) Run() error {
|
||||
scanner := bufio.NewScanner(s.in)
|
||||
// Allow for very large JSON payloads.
|
||||
scanner.Buffer(make([]byte, 1024*1024), 100*1024*1024)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
var cmd Command
|
||||
if err := json.Unmarshal([]byte(line), &cmd); err != nil {
|
||||
s.writeResponse(&ErrorResponse{
|
||||
Type: "error",
|
||||
Message: fmt.Sprintf("failed to parse command: %v", err),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
quit, err := s.dispatch(cmd)
|
||||
if err != nil {
|
||||
s.writeResponse(&ErrorResponse{
|
||||
Type: "error",
|
||||
Message: err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
if quit {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
func (s *Server) dispatch(cmd Command) (quit bool, err error) {
|
||||
switch cmd.Command {
|
||||
case "parse":
|
||||
ast, err := s.handler.HandleParse(cmd.Filename)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
s.writeResponse(&ASTResponse{
|
||||
Type: "ast",
|
||||
AST: ast,
|
||||
})
|
||||
case "prepare-files":
|
||||
if err := s.handler.HandlePrepareFiles(cmd.Filenames); err != nil {
|
||||
return false, err
|
||||
}
|
||||
s.writeResponse(&OKResponse{Type: "ok"})
|
||||
case "reset":
|
||||
if err := s.handler.HandleReset(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
s.writeResponse(&ResetDoneResponse{Type: "reset-done"})
|
||||
case "get-metadata":
|
||||
resp, err := s.handler.HandleGetMetadata()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
s.writeResponse(resp)
|
||||
case "quit":
|
||||
return true, nil
|
||||
default:
|
||||
return false, fmt.Errorf("unknown command: %s", cmd.Command)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (s *Server) writeResponse(resp Response) {
|
||||
data, err := json.Marshal(resp)
|
||||
if err != nil {
|
||||
// If we can't marshal the response, write an error.
|
||||
fmt.Fprintf(s.out, `{"type":"error","message":"marshal error: %s"}`+"\n", err.Error())
|
||||
return
|
||||
}
|
||||
s.out.Write(data)
|
||||
s.out.Write([]byte("\n"))
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
package protocol
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// mockHandler implements Handler for testing.
|
||||
type mockHandler struct {
|
||||
parseFunc func(string) (interface{}, error)
|
||||
prepareFilesFunc func([]string) error
|
||||
resetFunc func() error
|
||||
getMetadataFunc func() (*MetadataResponse, error)
|
||||
}
|
||||
|
||||
func (h *mockHandler) HandleParse(filename string) (interface{}, error) {
|
||||
if h.parseFunc != nil {
|
||||
return h.parseFunc(filename)
|
||||
}
|
||||
return map[string]interface{}{"kind": "SourceFile"}, nil
|
||||
}
|
||||
|
||||
func (h *mockHandler) HandlePrepareFiles(filenames []string) error {
|
||||
if h.prepareFilesFunc != nil {
|
||||
return h.prepareFilesFunc(filenames)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *mockHandler) HandleReset() error {
|
||||
if h.resetFunc != nil {
|
||||
return h.resetFunc()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *mockHandler) HandleGetMetadata() (*MetadataResponse, error) {
|
||||
if h.getMetadataFunc != nil {
|
||||
return h.getMetadataFunc()
|
||||
}
|
||||
return &MetadataResponse{
|
||||
Type: "metadata",
|
||||
SyntaxKinds: map[string]int{"SourceFile": 316},
|
||||
NodeFlags: map[string]int{"None": 0},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func TestServerGetMetadata(t *testing.T) {
|
||||
input := `{"command":"get-metadata"}` + "\n" + `{"command":"quit"}` + "\n"
|
||||
var output bytes.Buffer
|
||||
|
||||
handler := &mockHandler{}
|
||||
server := NewServerWithIO(handler, strings.NewReader(input), &output)
|
||||
|
||||
if err := server.Run(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
var resp MetadataResponse
|
||||
if err := json.Unmarshal(output.Bytes()[:bytes.IndexByte(output.Bytes(), '\n')], &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Type != "metadata" {
|
||||
t.Errorf("expected type 'metadata', got %q", resp.Type)
|
||||
}
|
||||
if _, ok := resp.SyntaxKinds["SourceFile"]; !ok {
|
||||
t.Error("expected syntaxKinds to contain SourceFile")
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerParse(t *testing.T) {
|
||||
input := `{"command":"parse","filename":"test.ts"}` + "\n" + `{"command":"quit"}` + "\n"
|
||||
var output bytes.Buffer
|
||||
|
||||
handler := &mockHandler{}
|
||||
server := NewServerWithIO(handler, strings.NewReader(input), &output)
|
||||
|
||||
if err := server.Run(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
var resp ASTResponse
|
||||
if err := json.Unmarshal(output.Bytes()[:bytes.IndexByte(output.Bytes(), '\n')], &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Type != "ast" {
|
||||
t.Errorf("expected type 'ast', got %q", resp.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerPrepareFiles(t *testing.T) {
|
||||
input := `{"command":"prepare-files","filenames":["a.ts","b.ts"]}` + "\n" + `{"command":"quit"}` + "\n"
|
||||
var output bytes.Buffer
|
||||
|
||||
var receivedFiles []string
|
||||
handler := &mockHandler{
|
||||
prepareFilesFunc: func(files []string) error {
|
||||
receivedFiles = files
|
||||
return nil
|
||||
},
|
||||
}
|
||||
server := NewServerWithIO(handler, strings.NewReader(input), &output)
|
||||
|
||||
if err := server.Run(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if len(receivedFiles) != 2 || receivedFiles[0] != "a.ts" || receivedFiles[1] != "b.ts" {
|
||||
t.Errorf("expected [a.ts b.ts], got %v", receivedFiles)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerReset(t *testing.T) {
|
||||
input := `{"command":"reset"}` + "\n" + `{"command":"quit"}` + "\n"
|
||||
var output bytes.Buffer
|
||||
|
||||
resetCalled := false
|
||||
handler := &mockHandler{
|
||||
resetFunc: func() error {
|
||||
resetCalled = true
|
||||
return nil
|
||||
},
|
||||
}
|
||||
server := NewServerWithIO(handler, strings.NewReader(input), &output)
|
||||
|
||||
if err := server.Run(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if !resetCalled {
|
||||
t.Error("expected reset to be called")
|
||||
}
|
||||
|
||||
var resp ResetDoneResponse
|
||||
if err := json.Unmarshal(output.Bytes()[:bytes.IndexByte(output.Bytes(), '\n')], &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Type != "reset-done" {
|
||||
t.Errorf("expected type 'reset-done', got %q", resp.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerQuit(t *testing.T) {
|
||||
input := `{"command":"quit"}` + "\n"
|
||||
var output bytes.Buffer
|
||||
|
||||
handler := &mockHandler{}
|
||||
server := NewServerWithIO(handler, strings.NewReader(input), &output)
|
||||
|
||||
if err := server.Run(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// No output expected for quit
|
||||
if output.Len() != 0 {
|
||||
t.Errorf("expected no output, got %q", output.String())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,411 @@
|
||||
package tsparser
|
||||
|
||||
// GetStaticTS7Metadata returns hardcoded metadata for TypeScript 7.
|
||||
// This must be kept in sync with the TypeScript compiler's SyntaxKind and
|
||||
// NodeFlags enums.
|
||||
//
|
||||
// The SyntaxKind values here correspond to the TypeScript 7 (Go port)
|
||||
// compiler. The Java extractor uses the string names (not numeric IDs)
|
||||
// to identify node kinds, so the exact numeric values only matter for
|
||||
// the metadata response.
|
||||
func GetStaticTS7Metadata() *Metadata {
|
||||
return &Metadata{
|
||||
SyntaxKinds: syntaxKinds,
|
||||
NodeFlags: nodeFlags,
|
||||
}
|
||||
}
|
||||
|
||||
// GetSyntaxKinds returns the raw SyntaxKind name→number map.
|
||||
func GetSyntaxKinds() map[string]int {
|
||||
return syntaxKinds
|
||||
}
|
||||
|
||||
// BuildKindToNameMap returns a number→name reverse map for SyntaxKinds.
|
||||
func BuildKindToNameMap() map[uint32]string {
|
||||
m := make(map[uint32]string, len(syntaxKinds))
|
||||
for name, num := range syntaxKinds {
|
||||
key := uint32(num)
|
||||
if existing, ok := m[key]; !ok || len(name) < len(existing) {
|
||||
m[key] = name
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// syntaxKinds maps SyntaxKind names to their numeric values in TypeScript 7.
|
||||
// Generated from microsoft/typescript-go/internal/ast/kind.go (iota enum).
|
||||
var syntaxKinds = map[string]int{
|
||||
"Unknown": 0,
|
||||
"EndOfFileToken": 1,
|
||||
"SingleLineCommentTrivia": 2,
|
||||
"MultiLineCommentTrivia": 3,
|
||||
"NewLineTrivia": 4,
|
||||
"WhitespaceTrivia": 5,
|
||||
"ShebangTrivia": 6,
|
||||
"ConflictMarkerTrivia": 7,
|
||||
"NumericLiteral": 8,
|
||||
"BigIntLiteral": 9,
|
||||
"StringLiteral": 10,
|
||||
"JsxText": 11,
|
||||
"JsxTextAllWhiteSpaces": 12,
|
||||
"RegularExpressionLiteral": 13,
|
||||
"NoSubstitutionTemplateLiteral": 14,
|
||||
"TemplateHead": 15,
|
||||
"TemplateMiddle": 16,
|
||||
"TemplateTail": 17,
|
||||
"OpenBraceToken": 18,
|
||||
"CloseBraceToken": 19,
|
||||
"OpenParenToken": 20,
|
||||
"CloseParenToken": 21,
|
||||
"OpenBracketToken": 22,
|
||||
"CloseBracketToken": 23,
|
||||
"DotToken": 24,
|
||||
"DotDotDotToken": 25,
|
||||
"SemicolonToken": 26,
|
||||
"CommaToken": 27,
|
||||
"QuestionDotToken": 28,
|
||||
"LessThanToken": 29,
|
||||
"LessThanSlashToken": 30,
|
||||
"GreaterThanToken": 31,
|
||||
"LessThanEqualsToken": 32,
|
||||
"GreaterThanEqualsToken": 33,
|
||||
"EqualsEqualsToken": 34,
|
||||
"ExclamationEqualsToken": 35,
|
||||
"EqualsEqualsEqualsToken": 36,
|
||||
"ExclamationEqualsEqualsToken": 37,
|
||||
"EqualsGreaterThanToken": 38,
|
||||
"PlusToken": 39,
|
||||
"MinusToken": 40,
|
||||
"AsteriskToken": 41,
|
||||
"AsteriskAsteriskToken": 42,
|
||||
"SlashToken": 43,
|
||||
"PercentToken": 44,
|
||||
"PlusPlusToken": 45,
|
||||
"MinusMinusToken": 46,
|
||||
"LessThanLessThanToken": 47,
|
||||
"GreaterThanGreaterThanToken": 48,
|
||||
"GreaterThanGreaterThanGreaterThanToken": 49,
|
||||
"AmpersandToken": 50,
|
||||
"BarToken": 51,
|
||||
"CaretToken": 52,
|
||||
"ExclamationToken": 53,
|
||||
"TildeToken": 54,
|
||||
"AmpersandAmpersandToken": 55,
|
||||
"BarBarToken": 56,
|
||||
"QuestionToken": 57,
|
||||
"ColonToken": 58,
|
||||
"AtToken": 59,
|
||||
"QuestionQuestionToken": 60,
|
||||
"HashToken": 62,
|
||||
"EqualsToken": 63,
|
||||
"PlusEqualsToken": 64,
|
||||
"MinusEqualsToken": 65,
|
||||
"AsteriskEqualsToken": 66,
|
||||
"AsteriskAsteriskEqualsToken": 67,
|
||||
"SlashEqualsToken": 68,
|
||||
"PercentEqualsToken": 69,
|
||||
"LessThanLessThanEqualsToken": 70,
|
||||
"GreaterThanGreaterThanEqualsToken": 71,
|
||||
"GreaterThanGreaterThanGreaterThanEqualsToken": 72,
|
||||
"AmpersandEqualsToken": 73,
|
||||
"BarEqualsToken": 74,
|
||||
"BarBarEqualsToken": 75,
|
||||
"AmpersandAmpersandEqualsToken": 76,
|
||||
"QuestionQuestionEqualsToken": 77,
|
||||
"CaretEqualsToken": 78,
|
||||
"Identifier": 79,
|
||||
"PrivateIdentifier": 80,
|
||||
"BreakKeyword": 82,
|
||||
"CaseKeyword": 83,
|
||||
"CatchKeyword": 84,
|
||||
"ClassKeyword": 85,
|
||||
"ConstKeyword": 86,
|
||||
"ContinueKeyword": 87,
|
||||
"DebuggerKeyword": 88,
|
||||
"DefaultKeyword": 89,
|
||||
"DeleteKeyword": 90,
|
||||
"DoKeyword": 91,
|
||||
"ElseKeyword": 92,
|
||||
"EnumKeyword": 93,
|
||||
"ExportKeyword": 94,
|
||||
"ExtendsKeyword": 95,
|
||||
"FalseKeyword": 96,
|
||||
"FinallyKeyword": 97,
|
||||
"ForKeyword": 98,
|
||||
"FunctionKeyword": 99,
|
||||
"IfKeyword": 100,
|
||||
"ImportKeyword": 101,
|
||||
"InKeyword": 102,
|
||||
"InstanceOfKeyword": 103,
|
||||
"NewKeyword": 104,
|
||||
"NullKeyword": 105,
|
||||
"ReturnKeyword": 106,
|
||||
"SuperKeyword": 107,
|
||||
"SwitchKeyword": 108,
|
||||
"ThisKeyword": 109,
|
||||
"ThrowKeyword": 110,
|
||||
"TrueKeyword": 111,
|
||||
"TryKeyword": 112,
|
||||
"TypeOfKeyword": 113,
|
||||
"VarKeyword": 114,
|
||||
"VoidKeyword": 115,
|
||||
"WhileKeyword": 116,
|
||||
"WithKeyword": 117,
|
||||
"ImplementsKeyword": 118,
|
||||
"InterfaceKeyword": 119,
|
||||
"LetKeyword": 120,
|
||||
"PackageKeyword": 121,
|
||||
"PrivateKeyword": 122,
|
||||
"ProtectedKeyword": 123,
|
||||
"PublicKeyword": 124,
|
||||
"StaticKeyword": 125,
|
||||
"YieldKeyword": 126,
|
||||
"AbstractKeyword": 127,
|
||||
"AccessorKeyword": 128,
|
||||
"AsKeyword": 129,
|
||||
"AssertsKeyword": 130,
|
||||
"AssertKeyword": 131,
|
||||
"AnyKeyword": 132,
|
||||
"AsyncKeyword": 133,
|
||||
"AwaitKeyword": 134,
|
||||
"BooleanKeyword": 135,
|
||||
"ConstructorKeyword": 136,
|
||||
"DeclareKeyword": 137,
|
||||
"GetKeyword": 138,
|
||||
"InferKeyword": 140,
|
||||
"IntrinsicKeyword": 141,
|
||||
"IsKeyword": 142,
|
||||
"KeyOfKeyword": 143,
|
||||
"ModuleKeyword": 144,
|
||||
"NamespaceKeyword": 145,
|
||||
"NeverKeyword": 146,
|
||||
"ReadonlyKeyword": 148,
|
||||
"RequireKeyword": 149,
|
||||
"NumberKeyword": 150,
|
||||
"ObjectKeyword": 151,
|
||||
"SetKeyword": 153,
|
||||
"StringKeyword": 154,
|
||||
"SymbolKeyword": 155,
|
||||
"TypeKeyword": 156,
|
||||
"UndefinedKeyword": 157,
|
||||
"UniqueKeyword": 158,
|
||||
"UnknownKeyword": 159,
|
||||
"FromKeyword": 161,
|
||||
"BigIntKeyword": 163,
|
||||
"OverrideKeyword": 164,
|
||||
"OfKeyword": 165,
|
||||
"DeferKeyword": 166,
|
||||
"QualifiedName": 167,
|
||||
"ComputedPropertyName": 168,
|
||||
"TypeParameter": 169,
|
||||
"Parameter": 170,
|
||||
"Decorator": 171,
|
||||
"PropertySignature": 172,
|
||||
"PropertyDeclaration": 173,
|
||||
"MethodSignature": 174,
|
||||
"MethodDeclaration": 175,
|
||||
"ClassStaticBlockDeclaration": 176,
|
||||
"Constructor": 177,
|
||||
"GetAccessor": 178,
|
||||
"SetAccessor": 179,
|
||||
"CallSignature": 180,
|
||||
"ConstructSignature": 181,
|
||||
"IndexSignature": 182,
|
||||
"TypePredicate": 183,
|
||||
"TypeReference": 184,
|
||||
"FunctionType": 185,
|
||||
"ConstructorType": 186,
|
||||
"TypeQuery": 187,
|
||||
"TypeLiteral": 188,
|
||||
"ArrayType": 189,
|
||||
"TupleType": 190,
|
||||
"OptionalType": 191,
|
||||
"RestType": 192,
|
||||
"UnionType": 193,
|
||||
"IntersectionType": 194,
|
||||
"ConditionalType": 195,
|
||||
"InferType": 196,
|
||||
"ParenthesizedType": 197,
|
||||
"ThisType": 198,
|
||||
"TypeOperator": 199,
|
||||
"IndexedAccessType": 200,
|
||||
"MappedType": 201,
|
||||
"LiteralType": 202,
|
||||
"NamedTupleMember": 203,
|
||||
"TemplateLiteralType": 204,
|
||||
"TemplateLiteralTypeSpan": 205,
|
||||
"ImportType": 206,
|
||||
"ObjectBindingPattern": 207,
|
||||
"ArrayBindingPattern": 208,
|
||||
"BindingElement": 209,
|
||||
"ArrayLiteralExpression": 210,
|
||||
"ObjectLiteralExpression": 211,
|
||||
"PropertyAccessExpression": 212,
|
||||
"ElementAccessExpression": 213,
|
||||
"CallExpression": 214,
|
||||
"NewExpression": 215,
|
||||
"TaggedTemplateExpression": 216,
|
||||
"TypeAssertionExpression": 217,
|
||||
"ParenthesizedExpression": 218,
|
||||
"FunctionExpression": 219,
|
||||
"ArrowFunction": 220,
|
||||
"DeleteExpression": 221,
|
||||
"TypeOfExpression": 222,
|
||||
"VoidExpression": 223,
|
||||
"AwaitExpression": 224,
|
||||
"PrefixUnaryExpression": 225,
|
||||
"PostfixUnaryExpression": 226,
|
||||
"BinaryExpression": 227,
|
||||
"ConditionalExpression": 228,
|
||||
"TemplateExpression": 229,
|
||||
"YieldExpression": 230,
|
||||
"SpreadElement": 231,
|
||||
"ClassExpression": 232,
|
||||
"OmittedExpression": 233,
|
||||
"ExpressionWithTypeArguments": 234,
|
||||
"AsExpression": 235,
|
||||
"NonNullExpression": 236,
|
||||
"MetaProperty": 237,
|
||||
"SatisfiesExpression": 239,
|
||||
"TemplateSpan": 240,
|
||||
"SemicolonClassElement": 241,
|
||||
"Block": 242,
|
||||
"EmptyStatement": 243,
|
||||
"VariableStatement": 244,
|
||||
"ExpressionStatement": 245,
|
||||
"IfStatement": 246,
|
||||
"DoStatement": 247,
|
||||
"WhileStatement": 248,
|
||||
"ForStatement": 249,
|
||||
"ForInStatement": 250,
|
||||
"ForOfStatement": 251,
|
||||
"ContinueStatement": 252,
|
||||
"BreakStatement": 253,
|
||||
"ReturnStatement": 254,
|
||||
"WithStatement": 255,
|
||||
"SwitchStatement": 256,
|
||||
"LabeledStatement": 257,
|
||||
"ThrowStatement": 258,
|
||||
"TryStatement": 259,
|
||||
"DebuggerStatement": 260,
|
||||
"VariableDeclaration": 261,
|
||||
"VariableDeclarationList": 262,
|
||||
"FunctionDeclaration": 263,
|
||||
"ClassDeclaration": 264,
|
||||
"InterfaceDeclaration": 265,
|
||||
"TypeAliasDeclaration": 266,
|
||||
"EnumDeclaration": 267,
|
||||
"ModuleDeclaration": 268,
|
||||
"ModuleBlock": 269,
|
||||
"CaseBlock": 270,
|
||||
"NamespaceExportDeclaration": 271,
|
||||
"ImportEqualsDeclaration": 272,
|
||||
"ImportDeclaration": 273,
|
||||
"ImportClause": 274,
|
||||
"NamespaceImport": 275,
|
||||
"NamedImports": 276,
|
||||
"ImportSpecifier": 277,
|
||||
"ExportAssignment": 278,
|
||||
"ExportDeclaration": 279,
|
||||
"NamedExports": 280,
|
||||
"NamespaceExport": 281,
|
||||
"ExportSpecifier": 282,
|
||||
"MissingDeclaration": 283,
|
||||
"ExternalModuleReference": 284,
|
||||
"JsxElement": 285,
|
||||
"JsxSelfClosingElement": 286,
|
||||
"JsxOpeningElement": 287,
|
||||
"JsxClosingElement": 288,
|
||||
"JsxFragment": 289,
|
||||
"JsxOpeningFragment": 290,
|
||||
"JsxClosingFragment": 291,
|
||||
"JsxAttribute": 292,
|
||||
"JsxAttributes": 293,
|
||||
"JsxSpreadAttribute": 294,
|
||||
"JsxExpression": 295,
|
||||
"JsxNamespacedName": 296,
|
||||
"CaseClause": 297,
|
||||
"DefaultClause": 298,
|
||||
"HeritageClause": 299,
|
||||
"CatchClause": 300,
|
||||
"ImportAttributes": 301,
|
||||
"ImportAttribute": 302,
|
||||
"PropertyAssignment": 303,
|
||||
"ShorthandPropertyAssignment": 304,
|
||||
"SpreadAssignment": 305,
|
||||
"EnumMember": 306,
|
||||
"SourceFile": 307,
|
||||
"JSDocTypeExpression": 308,
|
||||
"JSDocNameReference": 309,
|
||||
"JSDocNullableType": 312,
|
||||
"JSDocNonNullableType": 313,
|
||||
"JSDocOptionalType": 314,
|
||||
"JSDocVariadicType": 315,
|
||||
"JSDoc": 316,
|
||||
"JSDocText": 317,
|
||||
"JSDocTypeLiteral": 318,
|
||||
"JSDocSignature": 319,
|
||||
"JSDocLink": 320,
|
||||
"JSDocLinkCode": 321,
|
||||
"JSDocLinkPlain": 322,
|
||||
"JSDocTag": 323,
|
||||
"JSDocAugmentsTag": 324,
|
||||
"JSDocImplementsTag": 325,
|
||||
"JSDocDeprecatedTag": 326,
|
||||
"JSDocPublicTag": 327,
|
||||
"JSDocPrivateTag": 328,
|
||||
"JSDocProtectedTag": 329,
|
||||
"JSDocReadonlyTag": 330,
|
||||
"JSDocOverrideTag": 331,
|
||||
"JSDocCallbackTag": 332,
|
||||
"JSDocOverloadTag": 333,
|
||||
"JSDocParameterTag": 334,
|
||||
"JSDocReturnTag": 335,
|
||||
"JSDocThisTag": 336,
|
||||
"JSDocTypeTag": 337,
|
||||
"JSDocTemplateTag": 338,
|
||||
"JSDocTypedefTag": 339,
|
||||
"JSDocSeeTag": 340,
|
||||
"JSDocPropertyTag": 341,
|
||||
"JSDocThrowsTag": 342,
|
||||
"JSDocSatisfiesTag": 343,
|
||||
"JSDocImportTag": 344,
|
||||
}
|
||||
|
||||
// nodeFlags maps NodeFlags names to their numeric values sent to the Java extractor.
|
||||
// The Java extractor only checks Using, NestedNamespace, and GlobalAugmentation.
|
||||
//
|
||||
// TS7 binary AST flag layout (differs from TS5):
|
||||
// bit 0: Let, bit 1: Const, bit 2: Using, bit 3: NestedNamespace (not set in binary),
|
||||
// bit 4: Namespace, bit 5: OptionalChain, bit 6: ExportContext (was GlobalAugmentation
|
||||
// in TS5 at bit 11), bit 7: ContainsThis, ...
|
||||
//
|
||||
// GlobalAugmentation is NOT a flag in the TS7 binary format. We use a synthetic bit (30)
|
||||
// that the converter sets on `declare global {}` nodes so the Java extractor can detect them.
|
||||
var nodeFlags = map[string]int{
|
||||
"None": 0,
|
||||
"Let": 1,
|
||||
"Const": 2,
|
||||
"Using": 4, // Let | Const
|
||||
"AwaitUsing": 6, // Using | Const
|
||||
"NestedNamespace": 8, // bit 3 — synthetic, set by converter
|
||||
"Namespace": 16, // bit 4
|
||||
"OptionalChain": 32, // bit 5
|
||||
"ExportContext": 64, // bit 6
|
||||
"GlobalAugmentation": 1 << 30, // synthetic — set by converter for `declare global {}`
|
||||
"ContainsThis": 128, // bit 7
|
||||
"HasImplicitReturn": 256, // bit 8
|
||||
"HasExplicitReturn": 512, // bit 9
|
||||
"HasAsyncFunctions": 1024, // bit 10
|
||||
"DisallowInContext": 2048, // bit 11
|
||||
"YieldContext": 4096, // bit 12
|
||||
"DecoratorContext": 8192, // bit 13
|
||||
"AwaitContext": 16384, // bit 14
|
||||
"DisallowConditionalTypesContext": 32768, // bit 15
|
||||
"ThisNodeHasError": 65536, // bit 16
|
||||
"JavaScriptFile": 131072, // bit 17
|
||||
"ThisNodeOrAnySubNodesHasError": 262144, // bit 18
|
||||
"HasAggregatedChildData": 524288, // bit 19
|
||||
"JSDoc": 8388608, // bit 23
|
||||
"JsonFile": 67108864, // bit 26
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
// Package tsparser provides an interface for parsing TypeScript files and
|
||||
// implementations backed by different TypeScript compiler versions.
|
||||
//
|
||||
// The primary implementation uses the tsgo binary (TypeScript 7's Go-based
|
||||
// compiler) as a subprocess via its --api mode.
|
||||
package tsparser
|
||||
|
||||
import "io"
|
||||
|
||||
// ParseResult holds the parsed AST for a single file.
|
||||
type ParseResult struct {
|
||||
// AST is the parsed AST tree, ready for JSON serialization.
|
||||
AST interface{}
|
||||
|
||||
// RawData holds the raw binary-encoded source file data from tsgo.
|
||||
// This is present when using the tsgo API backend and needs to be
|
||||
// decoded into the AST format expected by the Java extractor.
|
||||
RawData []byte
|
||||
}
|
||||
|
||||
// Metadata holds the compiler metadata (syntax kind and node flag mappings).
|
||||
type Metadata struct {
|
||||
SyntaxKinds map[string]int
|
||||
NodeFlags map[string]int
|
||||
}
|
||||
|
||||
// Parser is the interface for TypeScript parsing backends.
|
||||
type Parser interface {
|
||||
// Parse parses the given file and returns the AST.
|
||||
Parse(filename string) (*ParseResult, error)
|
||||
|
||||
// GetMetadata returns the syntax kind and node flag mappings for
|
||||
// the underlying TypeScript compiler.
|
||||
GetMetadata() (*Metadata, error)
|
||||
|
||||
// Reset discards any cached state and returns the parser to a fresh state.
|
||||
Reset() error
|
||||
|
||||
// Close shuts down the parser, releasing any resources.
|
||||
Close() error
|
||||
}
|
||||
|
||||
// TsgoBinaryFinder locates the tsgo binary. This is separated to allow
|
||||
// different search strategies (PATH, npm package, env var, etc.).
|
||||
type TsgoBinaryFinder interface {
|
||||
// FindBinary returns the path to the tsgo binary.
|
||||
FindBinary() (string, error)
|
||||
}
|
||||
|
||||
// Config configures the parser backend.
|
||||
type Config struct {
|
||||
// TsgoBinary is the explicit path to the tsgo binary.
|
||||
// If empty, the binary is found via TsgoBinaryFinder or PATH.
|
||||
TsgoBinary string
|
||||
|
||||
// Stderr is where to redirect the tsgo process's stderr.
|
||||
// If nil, stderr is discarded.
|
||||
Stderr io.Writer
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
package tsparser
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// StandaloneParser implements the Parser interface by invoking the tsgo
|
||||
// binary once per file (non-persistent). This is simpler but slower than
|
||||
// the TsgoParser which keeps a persistent subprocess.
|
||||
//
|
||||
// This is intended as a fallback and for testing. For production use,
|
||||
// prefer TsgoParser.
|
||||
type StandaloneParser struct {
|
||||
config Config
|
||||
}
|
||||
|
||||
// NewStandaloneParser creates a parser that invokes tsgo once per file.
|
||||
func NewStandaloneParser(config Config) *StandaloneParser {
|
||||
return &StandaloneParser{config: config}
|
||||
}
|
||||
|
||||
func (p *StandaloneParser) findBinary() (string, error) {
|
||||
if p.config.TsgoBinary != "" {
|
||||
return p.config.TsgoBinary, nil
|
||||
}
|
||||
path, err := exec.LookPath("tsgo")
|
||||
if err == nil {
|
||||
return path, nil
|
||||
}
|
||||
return "", fmt.Errorf("tsgo binary not found on PATH")
|
||||
}
|
||||
|
||||
// Parse parses a single TypeScript file by running tsgo.
|
||||
// Since tsgo doesn't have a direct "dump AST" mode, this uses a
|
||||
// minimal tsconfig.json to parse the file and extract diagnostics.
|
||||
//
|
||||
// TODO: Replace with direct API call when the tsgo Go API is public.
|
||||
func (p *StandaloneParser) Parse(filename string) (*ParseResult, error) {
|
||||
binary, err := p.findBinary()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
absPath, err := filepath.Abs(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to resolve path: %w", err)
|
||||
}
|
||||
|
||||
// Create a temporary tsconfig to parse just this one file.
|
||||
tmpDir, err := os.MkdirTemp("", "tsparser-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create temp dir: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
tsconfig := map[string]interface{}{
|
||||
"compilerOptions": map[string]interface{}{
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"jsx": "preserve",
|
||||
"experimentalDecorators": true,
|
||||
"noResolve": true,
|
||||
"noEmit": true,
|
||||
},
|
||||
"files": []string{absPath},
|
||||
}
|
||||
tsconfigData, _ := json.Marshal(tsconfig)
|
||||
tsconfigPath := filepath.Join(tmpDir, "tsconfig.json")
|
||||
if err := os.WriteFile(tsconfigPath, tsconfigData, 0644); err != nil {
|
||||
return nil, fmt.Errorf("failed to write tsconfig: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command(binary, "--project", tsconfigPath, "--noEmit")
|
||||
var stderr strings.Builder
|
||||
cmd.Stderr = &stderr
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
// tsgo reports type errors via exit code, but the parse may still succeed.
|
||||
// We only care about parse errors, not type errors.
|
||||
_ = output
|
||||
}
|
||||
|
||||
// tsgo doesn't dump the AST directly. For now, return a placeholder
|
||||
// indicating the file was processed. The actual AST extraction will
|
||||
// need the Go API or a custom tsgo build.
|
||||
return &ParseResult{
|
||||
AST: map[string]interface{}{
|
||||
"kind": "SourceFile",
|
||||
"_note": "placeholder: tsgo CLI does not support AST dump; awaiting Go API",
|
||||
"_file": absPath,
|
||||
"_error": stderr.String(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetMetadata returns static TS7 metadata.
|
||||
func (p *StandaloneParser) GetMetadata() (*Metadata, error) {
|
||||
return GetStaticTS7Metadata(), nil
|
||||
}
|
||||
|
||||
// Reset is a no-op for the standalone parser.
|
||||
func (p *StandaloneParser) Reset() error { return nil }
|
||||
|
||||
// Close is a no-op for the standalone parser.
|
||||
func (p *StandaloneParser) Close() error { return nil }
|
||||
483
javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go
Normal file
483
javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go
Normal file
@@ -0,0 +1,483 @@
|
||||
package tsparser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/textproto"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/astconv"
|
||||
)
|
||||
|
||||
// TsgoParser implements the Parser interface by running the tsgo binary
|
||||
// as a subprocess using its --api --async (JSON-RPC) mode.
|
||||
//
|
||||
// The tsgo API uses LSP-style Content-Length framing with JSON-RPC 2.0.
|
||||
// The API is project-based: you initialize, create a snapshot (optionally
|
||||
// opening a project/tsconfig), then query source files from that snapshot.
|
||||
// Source files are returned as a custom binary encoding (not JSON).
|
||||
//
|
||||
// This is a transitional implementation. When the typescript-go project
|
||||
// exposes a public Go API, this should be replaced with direct in-process
|
||||
// calls for better performance.
|
||||
type TsgoParser struct {
|
||||
config Config
|
||||
mu sync.Mutex
|
||||
cmd *exec.Cmd
|
||||
stdin io.WriteCloser
|
||||
stdout *bufio.Reader
|
||||
started bool
|
||||
nextID int
|
||||
|
||||
// Cached handles from the API session
|
||||
snapshotHandle string
|
||||
projectHandle string
|
||||
}
|
||||
|
||||
// NewTsgoParser creates a parser backed by the tsgo binary.
|
||||
func NewTsgoParser(config Config) *TsgoParser {
|
||||
return &TsgoParser{
|
||||
config: config,
|
||||
nextID: 1,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *TsgoParser) findBinary() (string, error) {
|
||||
if p.config.TsgoBinary != "" {
|
||||
return p.config.TsgoBinary, nil
|
||||
}
|
||||
// Look for tsgo on PATH (installed via: npm install -g @typescript/native-preview)
|
||||
path, err := exec.LookPath("tsgo")
|
||||
if err == nil {
|
||||
// The npm-installed tsgo is a Node.js wrapper script that invokes the native binary.
|
||||
// Try to resolve the native binary directly so we don't need Node.js at runtime.
|
||||
if native := resolveNativeTsgo(path); native != "" {
|
||||
return native, nil
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
return "", fmt.Errorf("tsgo binary not found on PATH; install with: npm install -g @typescript/native-preview")
|
||||
}
|
||||
|
||||
// resolveNativeTsgo attempts to find the native tsgo binary inside an npm installation.
|
||||
// The npm package @typescript/native-preview installs a Node.js wrapper at bin/tsgo
|
||||
// which delegates to a platform-specific native binary at:
|
||||
// node_modules/@typescript/native-preview-<platform>-<arch>/lib/tsgo
|
||||
func resolveNativeTsgo(wrapperPath string) string {
|
||||
// Follow symlinks to find the real wrapper location
|
||||
resolved, err := filepath.EvalSymlinks(wrapperPath)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
// The wrapper is at <prefix>/bin/tsgo.js or <prefix>/bin/tsgo
|
||||
// The native binary is at <prefix>/node_modules/@typescript/native-preview-<os>-<arch>/lib/tsgo
|
||||
pkgDir := filepath.Dir(filepath.Dir(resolved))
|
||||
platformPkg := fmt.Sprintf("@typescript/native-preview-%s-%s", runtime.GOOS, runtime.GOARCH)
|
||||
native := filepath.Join(pkgDir, "node_modules", platformPkg, "lib", "tsgo")
|
||||
if info, err := os.Stat(native); err == nil && !info.IsDir() {
|
||||
return native
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// startProcess starts the tsgo subprocess without sending any API requests.
|
||||
func (p *TsgoParser) startProcess() error {
|
||||
if p.started {
|
||||
return nil
|
||||
}
|
||||
|
||||
binary, err := p.findBinary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.cmd = exec.Command(binary, "--api", "--async")
|
||||
p.cmd.Stderr = p.config.Stderr
|
||||
if p.cmd.Stderr == nil {
|
||||
p.cmd.Stderr = os.Stderr
|
||||
}
|
||||
|
||||
stdin, err := p.cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create stdin pipe: %w", err)
|
||||
}
|
||||
p.stdin = stdin
|
||||
|
||||
stdout, err := p.cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create stdout pipe: %w", err)
|
||||
}
|
||||
p.stdout = bufio.NewReaderSize(stdout, 10*1024*1024)
|
||||
|
||||
if err := p.cmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start tsgo: %w", err)
|
||||
}
|
||||
|
||||
p.started = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureInitialized starts the process and sends the initialize request.
|
||||
func (p *TsgoParser) ensureInitialized() error {
|
||||
if err := p.startProcess(); err != nil {
|
||||
return err
|
||||
}
|
||||
if p.snapshotHandle != "" {
|
||||
return nil // Already initialized
|
||||
}
|
||||
|
||||
// Send initialize request
|
||||
_, err := p.sendRequest("initialize", map[string]interface{}{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to initialize tsgo API: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// jsonRPCRequest is a JSON-RPC 2.0 request.
|
||||
type jsonRPCRequest struct {
|
||||
JSONRPC string `json:"jsonrpc"`
|
||||
ID int `json:"id"`
|
||||
Method string `json:"method"`
|
||||
Params interface{} `json:"params,omitempty"`
|
||||
}
|
||||
|
||||
// jsonRPCResponse is a JSON-RPC 2.0 response.
|
||||
type jsonRPCResponse struct {
|
||||
JSONRPC string `json:"jsonrpc"`
|
||||
ID int `json:"id"`
|
||||
Result json.RawMessage `json:"result,omitempty"`
|
||||
Error *jsonRPCError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type jsonRPCError struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// writeMessage writes an LSP-framed message (Content-Length header + body).
|
||||
func (p *TsgoParser) writeMessage(data []byte) error {
|
||||
header := fmt.Sprintf("Content-Length: %d\r\n\r\n", len(data))
|
||||
if _, err := io.WriteString(p.stdin, header); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := p.stdin.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// readMessage reads an LSP-framed message (Content-Length header + body).
|
||||
func (p *TsgoParser) readMessage() ([]byte, error) {
|
||||
tp := textproto.NewReader(p.stdout)
|
||||
header, err := tp.ReadMIMEHeader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read message header: %w", err)
|
||||
}
|
||||
|
||||
lengthStr := header.Get("Content-Length")
|
||||
if lengthStr == "" {
|
||||
return nil, fmt.Errorf("missing Content-Length header")
|
||||
}
|
||||
length, err := strconv.Atoi(lengthStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid Content-Length: %w", err)
|
||||
}
|
||||
|
||||
body := make([]byte, length)
|
||||
if _, err := io.ReadFull(p.stdout, body); err != nil {
|
||||
return nil, fmt.Errorf("failed to read message body: %w", err)
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
||||
// sendRequest sends a JSON-RPC request and returns the response. Not locked.
|
||||
func (p *TsgoParser) sendRequest(method string, params interface{}) (json.RawMessage, error) {
|
||||
id := p.nextID
|
||||
p.nextID++
|
||||
|
||||
req := jsonRPCRequest{
|
||||
JSONRPC: "2.0",
|
||||
ID: id,
|
||||
Method: method,
|
||||
Params: params,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "[tsgo] >>> %s id=%d\n", method, id)
|
||||
|
||||
if err := p.writeMessage(data); err != nil {
|
||||
return nil, fmt.Errorf("failed to write request: %w", err)
|
||||
}
|
||||
|
||||
// Read responses, skipping notifications (messages without a matching id).
|
||||
// In --async mode, tsgo may send diagnostic notifications between responses.
|
||||
for {
|
||||
respData, err := p.readMessage()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
var resp jsonRPCResponse
|
||||
if err := json.Unmarshal(respData, &resp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
// Skip notifications (id=0 means no id field was present in JSON)
|
||||
if resp.ID != id {
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.Error != nil {
|
||||
return nil, fmt.Errorf("tsgo API error %d: %s", resp.Error.Code, resp.Error.Message)
|
||||
}
|
||||
|
||||
return resp.Result, nil
|
||||
}
|
||||
}
|
||||
|
||||
// call sends a request with proper locking and initialization.
|
||||
func (p *TsgoParser) call(method string, params interface{}) (json.RawMessage, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if err := p.ensureInitialized(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return p.sendRequest(method, params)
|
||||
}
|
||||
|
||||
// updateSnapshotResponse is the response from the updateSnapshot API call.
|
||||
type updateSnapshotResponse struct {
|
||||
Snapshot string `json:"snapshot"`
|
||||
Projects []struct {
|
||||
ID string `json:"id"`
|
||||
ConfigFileName string `json:"configFileName"`
|
||||
} `json:"projects"`
|
||||
}
|
||||
|
||||
// ensureProjectOpen opens a project for the given file.
|
||||
// The tsgo API requires a tsconfig for project opening, so if none exists
|
||||
// in the file's directory, we create a temporary one.
|
||||
func (p *TsgoParser) ensureProjectOpen(filename string) error {
|
||||
if p.snapshotHandle != "" && p.projectHandle != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
dir := filepath.Dir(filename)
|
||||
base := filepath.Base(filename)
|
||||
tsconfigPath := filepath.Join(dir, "tsconfig.json")
|
||||
|
||||
// If no tsconfig exists, create a temporary one
|
||||
createdTsconfig := false
|
||||
if _, err := os.Stat(tsconfigPath); os.IsNotExist(err) {
|
||||
tsconfig := fmt.Sprintf(`{
|
||||
"compilerOptions": {
|
||||
"target": "esnext",
|
||||
"module": "esnext",
|
||||
"noEmit": true,
|
||||
"strict": false,
|
||||
"allowJs": true
|
||||
},
|
||||
"files": [%q]
|
||||
}`, base)
|
||||
if err := os.WriteFile(tsconfigPath, []byte(tsconfig), 0644); err != nil {
|
||||
return fmt.Errorf("failed to create temporary tsconfig: %w", err)
|
||||
}
|
||||
createdTsconfig = true
|
||||
}
|
||||
|
||||
result, err := p.sendRequest("updateSnapshot", map[string]interface{}{
|
||||
"openProject": tsconfigPath,
|
||||
})
|
||||
|
||||
// Clean up temporary tsconfig
|
||||
if createdTsconfig {
|
||||
os.Remove(tsconfigPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open project: %w", err)
|
||||
}
|
||||
|
||||
var resp updateSnapshotResponse
|
||||
if err := json.Unmarshal(result, &resp); err != nil {
|
||||
return fmt.Errorf("failed to parse updateSnapshot response: %w", err)
|
||||
}
|
||||
|
||||
p.snapshotHandle = resp.Snapshot
|
||||
if len(resp.Projects) > 0 {
|
||||
p.projectHandle = resp.Projects[0].ID
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse parses the given file using the tsgo API.
|
||||
//
|
||||
// The tsgo API is project-based, so for each parse request we ensure
|
||||
// a project is open, then call getSourceFile. The response is a custom
|
||||
// binary encoding of the AST (not JSON).
|
||||
//
|
||||
// When the public Go API becomes available, this should be replaced
|
||||
// with direct parser.ParseSourceFile() calls.
|
||||
func (p *TsgoParser) Parse(filename string) (*ParseResult, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if err := p.ensureInitialized(); err != nil {
|
||||
return nil, fmt.Errorf("parse %s: %w", filename, err)
|
||||
}
|
||||
|
||||
if err := p.ensureProjectOpen(filename); err != nil {
|
||||
return nil, fmt.Errorf("parse %s: %w", filename, err)
|
||||
}
|
||||
|
||||
params := map[string]interface{}{
|
||||
"file": filename,
|
||||
}
|
||||
if p.snapshotHandle != "" {
|
||||
params["snapshot"] = p.snapshotHandle
|
||||
}
|
||||
if p.projectHandle != "" {
|
||||
params["project"] = p.projectHandle
|
||||
}
|
||||
|
||||
result, err := p.sendRequest("getSourceFile", params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: %w", filename, err)
|
||||
}
|
||||
|
||||
// The result is {"data":"<base64>"} containing a binary-encoded AST.
|
||||
var dataResp struct {
|
||||
Data string `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(result, &dataResp); err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to parse getSourceFile response: %w", filename, err)
|
||||
}
|
||||
|
||||
binaryAST, err := astconv.DecodeBinaryASTFromBase64(dataResp.Data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to decode binary AST: %w", filename, err)
|
||||
}
|
||||
|
||||
// Fetch syntactic diagnostics (parse errors) from the compiler.
|
||||
diags := p.getSyntacticDiagnostics(filename)
|
||||
|
||||
kindToName := BuildKindToNameMap()
|
||||
converter := astconv.NewConverter(binaryAST, kindToName)
|
||||
converter.SetParseDiagnostics(diags)
|
||||
astObj, err := converter.Convert()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to convert AST: %w", filename, err)
|
||||
}
|
||||
|
||||
filtered := astconv.FilterWhitelist(astObj)
|
||||
|
||||
return &ParseResult{
|
||||
AST: filtered,
|
||||
RawData: []byte(dataResp.Data),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getSyntacticDiagnostics fetches parse errors from the tsgo API.
|
||||
// Only includes true parse errors (diagnostic code < 2000), not semantic-level
|
||||
// diagnostics like deprecation warnings that TS7 added (e.g., code 2880 for
|
||||
// import assertions). Returns an empty slice on error (best-effort).
|
||||
func (p *TsgoParser) getSyntacticDiagnostics(filename string) []astconv.ParseDiagnostic {
|
||||
params := map[string]interface{}{
|
||||
"file": filename,
|
||||
}
|
||||
if p.snapshotHandle != "" {
|
||||
params["snapshot"] = p.snapshotHandle
|
||||
}
|
||||
if p.projectHandle != "" {
|
||||
params["project"] = p.projectHandle
|
||||
}
|
||||
|
||||
result, err := p.sendRequest("getSyntacticDiagnostics", params)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var rawDiags []struct {
|
||||
Pos int `json:"pos"`
|
||||
End int `json:"end"`
|
||||
Code int `json:"code"`
|
||||
Category int `json:"category"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
if err := json.Unmarshal(result, &rawDiags); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
diags := make([]astconv.ParseDiagnostic, 0, len(rawDiags))
|
||||
for _, d := range rawDiags {
|
||||
// Only include genuine parse errors (codes 1000-1999).
|
||||
// Higher codes are semantic diagnostics that TS7 reports as "syntactic"
|
||||
// but which don't indicate actual parse failures.
|
||||
if d.Code < 1000 || d.Code >= 2000 {
|
||||
continue
|
||||
}
|
||||
diags = append(diags, astconv.ParseDiagnostic{
|
||||
Pos: d.Pos,
|
||||
End: d.End,
|
||||
MessageText: d.Text,
|
||||
})
|
||||
}
|
||||
return diags
|
||||
}
|
||||
|
||||
// GetMetadata returns the syntax kinds and node flags.
|
||||
func (p *TsgoParser) GetMetadata() (*Metadata, error) {
|
||||
return GetStaticTS7Metadata(), nil
|
||||
}
|
||||
|
||||
// Reset resets the parser state, killing and restarting the subprocess.
|
||||
func (p *TsgoParser) Reset() error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.killProcess()
|
||||
p.started = false
|
||||
p.nextID = 1
|
||||
p.snapshotHandle = ""
|
||||
p.projectHandle = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close shuts down the tsgo subprocess.
|
||||
func (p *TsgoParser) Close() error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.killProcess()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *TsgoParser) killProcess() {
|
||||
if !p.started {
|
||||
return
|
||||
}
|
||||
|
||||
if p.stdin != nil {
|
||||
p.stdin.Close()
|
||||
}
|
||||
if p.cmd != nil && p.cmd.Process != nil {
|
||||
p.cmd.Process.Kill()
|
||||
p.cmd.Wait() //nolint:errcheck
|
||||
}
|
||||
p.started = false
|
||||
}
|
||||
@@ -0,0 +1,302 @@
|
||||
package tsparser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTsgoInitialize(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH; install with: npm install -g @typescript/native-preview")
|
||||
}
|
||||
|
||||
parser := NewTsgoParser(Config{Stderr: os.Stderr})
|
||||
defer parser.Close()
|
||||
|
||||
// Test that we can start the process and send the initialize request
|
||||
parser.mu.Lock()
|
||||
err := parser.startProcess()
|
||||
if err != nil {
|
||||
parser.mu.Unlock()
|
||||
t.Fatalf("Failed to start tsgo process: %v", err)
|
||||
}
|
||||
|
||||
result, err := parser.sendRequest("initialize", map[string]interface{}{})
|
||||
parser.mu.Unlock()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to send initialize request: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Initialize response: %s", string(result))
|
||||
|
||||
// Parse the response
|
||||
var initResp struct {
|
||||
UseCaseSensitiveFileNames bool `json:"useCaseSensitiveFileNames"`
|
||||
CurrentDirectory string `json:"currentDirectory"`
|
||||
}
|
||||
if err := json.Unmarshal(result, &initResp); err != nil {
|
||||
t.Fatalf("Failed to parse initialize response: %v", err)
|
||||
}
|
||||
|
||||
if initResp.CurrentDirectory == "" {
|
||||
t.Error("Expected non-empty CurrentDirectory in initialize response")
|
||||
}
|
||||
t.Logf("Initialized: caseSensitive=%v, cwd=%s",
|
||||
initResp.UseCaseSensitiveFileNames, initResp.CurrentDirectory)
|
||||
}
|
||||
|
||||
func TestTsgoPing(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH")
|
||||
}
|
||||
|
||||
parser := NewTsgoParser(Config{Stderr: os.Stderr})
|
||||
defer parser.Close()
|
||||
|
||||
parser.mu.Lock()
|
||||
if err := parser.startProcess(); err != nil {
|
||||
parser.mu.Unlock()
|
||||
t.Fatalf("Failed to start tsgo: %v", err)
|
||||
}
|
||||
|
||||
result, err := parser.sendRequest("ping", nil)
|
||||
parser.mu.Unlock()
|
||||
if err != nil {
|
||||
t.Fatalf("Ping failed: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Ping response: %s", string(result))
|
||||
}
|
||||
|
||||
func TestTsgoUpdateSnapshotAndGetSourceFile(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH")
|
||||
}
|
||||
|
||||
// Find the sample test file
|
||||
testFile := findTestFile(t)
|
||||
testDir := filepath.Dir(testFile)
|
||||
|
||||
// Create a minimal tsconfig.json for the test file
|
||||
tsconfigPath := filepath.Join(testDir, "tsconfig.json")
|
||||
tsconfig := []byte(`{
|
||||
"compilerOptions": {
|
||||
"target": "esnext",
|
||||
"module": "esnext",
|
||||
"noEmit": true,
|
||||
"strict": false
|
||||
},
|
||||
"files": ["sample.ts"]
|
||||
}`)
|
||||
if err := os.WriteFile(tsconfigPath, tsconfig, 0644); err != nil {
|
||||
t.Fatalf("Failed to create tsconfig.json: %v", err)
|
||||
}
|
||||
defer os.Remove(tsconfigPath)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
parser := NewTsgoParser(Config{Stderr: &stderr})
|
||||
defer parser.Close()
|
||||
|
||||
parser.mu.Lock()
|
||||
defer parser.mu.Unlock()
|
||||
|
||||
// Step 1: Start and initialize
|
||||
if err := parser.startProcess(); err != nil {
|
||||
t.Fatalf("Failed to start tsgo: %v", err)
|
||||
}
|
||||
|
||||
initResult, err := parser.sendRequest("initialize", map[string]interface{}{})
|
||||
if err != nil {
|
||||
t.Fatalf("Initialize failed: %v", err)
|
||||
}
|
||||
t.Logf("Initialize: %s", string(initResult))
|
||||
|
||||
// Step 2: Update snapshot with project
|
||||
snapResult, err := parser.sendRequest("updateSnapshot", map[string]interface{}{
|
||||
"openProject": tsconfigPath,
|
||||
})
|
||||
if err != nil {
|
||||
t.Logf("Stderr output: %s", stderr.String())
|
||||
t.Fatalf("updateSnapshot failed: %v", err)
|
||||
}
|
||||
t.Logf("UpdateSnapshot: %s", string(snapResult))
|
||||
|
||||
var snapResp updateSnapshotResponse
|
||||
if err := json.Unmarshal(snapResult, &snapResp); err != nil {
|
||||
t.Fatalf("Failed to parse updateSnapshot response: %v", err)
|
||||
}
|
||||
|
||||
if snapResp.Snapshot == "" {
|
||||
t.Fatal("Expected non-empty snapshot handle")
|
||||
}
|
||||
t.Logf("Got snapshot: %s, %d projects", snapResp.Snapshot, len(snapResp.Projects))
|
||||
for i, p := range snapResp.Projects {
|
||||
t.Logf(" Project %d: id=%s config=%s", i, p.ID, p.ConfigFileName)
|
||||
}
|
||||
|
||||
if len(snapResp.Projects) == 0 {
|
||||
t.Fatal("Expected at least one project in snapshot")
|
||||
}
|
||||
|
||||
// Step 3: Get source file
|
||||
sfResult, err := parser.sendRequest("getSourceFile", map[string]interface{}{
|
||||
"snapshot": snapResp.Snapshot,
|
||||
"project": snapResp.Projects[0].ID,
|
||||
"file": testFile,
|
||||
})
|
||||
if err != nil {
|
||||
t.Logf("Stderr output: %s", stderr.String())
|
||||
t.Fatalf("getSourceFile failed: %v", err)
|
||||
}
|
||||
|
||||
// The response should contain base64-encoded binary data
|
||||
t.Logf("getSourceFile response length: %d bytes", len(sfResult))
|
||||
if len(sfResult) < 10 {
|
||||
t.Logf("getSourceFile response: %s", string(sfResult))
|
||||
} else {
|
||||
t.Logf("getSourceFile response (first 200 chars): %s", string(sfResult[:min(200, len(sfResult))]))
|
||||
}
|
||||
|
||||
if len(sfResult) == 0 || string(sfResult) == "null" {
|
||||
t.Error("Expected non-empty source file response")
|
||||
} else {
|
||||
t.Logf("Successfully retrieved source file data from tsgo API!")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTsgoGetMetadata(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH")
|
||||
}
|
||||
|
||||
parser := NewTsgoParser(Config{Stderr: os.Stderr})
|
||||
defer parser.Close()
|
||||
|
||||
meta, err := parser.GetMetadata()
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetadata failed: %v", err)
|
||||
}
|
||||
|
||||
if len(meta.SyntaxKinds) == 0 {
|
||||
t.Error("Expected non-empty SyntaxKinds")
|
||||
}
|
||||
if _, ok := meta.SyntaxKinds["SourceFile"]; !ok {
|
||||
t.Error("Expected SyntaxKinds to contain 'SourceFile'")
|
||||
}
|
||||
if len(meta.NodeFlags) == 0 {
|
||||
t.Error("Expected non-empty NodeFlags")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStaticMetadata(t *testing.T) {
|
||||
meta := GetStaticTS7Metadata()
|
||||
|
||||
required := []string{"SourceFile", "Identifier", "Block", "VariableStatement",
|
||||
"FunctionDeclaration", "ClassDeclaration", "InterfaceDeclaration"}
|
||||
for _, kind := range required {
|
||||
if _, ok := meta.SyntaxKinds[kind]; !ok {
|
||||
t.Errorf("Missing required SyntaxKind: %s", kind)
|
||||
}
|
||||
}
|
||||
|
||||
requiredFlags := []string{"None", "Let", "Const", "Namespace"}
|
||||
for _, flag := range requiredFlags {
|
||||
if _, ok := meta.NodeFlags[flag]; !ok {
|
||||
t.Errorf("Missing required NodeFlag: %s", flag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findTestFile(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir, _ := os.Getwd()
|
||||
for {
|
||||
candidate := filepath.Join(dir, "testdata", "sample.ts")
|
||||
if _, err := os.Stat(candidate); err == nil {
|
||||
return candidate
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
t.Fatal("Could not find testdata/sample.ts")
|
||||
return ""
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestTsgoParse(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH")
|
||||
}
|
||||
|
||||
sampleFile := findTestFile(t)
|
||||
parser := NewTsgoParser(Config{Stderr: os.Stderr})
|
||||
defer parser.Close()
|
||||
|
||||
result, err := parser.Parse(sampleFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Parse failed: %v", err)
|
||||
}
|
||||
|
||||
ast, ok := result.AST.(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("Expected AST to be map[string]interface{}, got %T", result.AST)
|
||||
}
|
||||
|
||||
// Verify the root is a SourceFile
|
||||
kindVal, ok := ast["kind"]
|
||||
if !ok {
|
||||
t.Fatal("Missing 'kind' property on root node")
|
||||
}
|
||||
kindNum, ok := kindVal.(int)
|
||||
if !ok {
|
||||
t.Fatalf("Expected 'kind' to be int, got %T", kindVal)
|
||||
}
|
||||
if kindNum != 307 { // SourceFile = 307 in TS7
|
||||
t.Errorf("Expected root kind=307 (SourceFile), got %d", kindNum)
|
||||
}
|
||||
|
||||
// Verify $pos and $end
|
||||
if _, ok := ast["$pos"]; !ok {
|
||||
t.Error("Missing '$pos' property")
|
||||
}
|
||||
if _, ok := ast["$end"]; !ok {
|
||||
t.Error("Missing '$end' property")
|
||||
}
|
||||
|
||||
// Verify statements array
|
||||
stmts, ok := ast["statements"]
|
||||
if !ok {
|
||||
t.Fatal("Missing 'statements' property")
|
||||
}
|
||||
stmtsArr, ok := stmts.([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("Expected statements to be array, got %T", stmts)
|
||||
}
|
||||
if len(stmtsArr) == 0 {
|
||||
t.Error("Expected non-empty statements array")
|
||||
}
|
||||
|
||||
// Print a nicely indented snippet for debug
|
||||
jsonBytes, err := json.MarshalIndent(ast, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal AST: %v", err)
|
||||
}
|
||||
snippet := string(jsonBytes)
|
||||
if len(snippet) > 2000 {
|
||||
snippet = snippet[:2000] + "\n... (truncated)"
|
||||
}
|
||||
t.Logf("Parsed AST (first 2000 chars):\n%s", snippet)
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Package validation provides a Go-based test framework for comparing
|
||||
// JSON output between the Node.js and Go TypeScript parser wrappers.
|
||||
//
|
||||
// Run with: go test ./internal/validation/ -v
|
||||
//
|
||||
// This requires both the Go wrapper binary and Node.js with the
|
||||
// TypeScript wrapper available.
|
||||
package validation
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// normalizeJSON parses JSON and re-serializes it with sorted keys for
|
||||
// stable comparison.
|
||||
func normalizeJSON(data []byte) ([]byte, error) {
|
||||
var obj interface{}
|
||||
if err := json.Unmarshal(data, &obj); err != nil {
|
||||
return nil, fmt.Errorf("invalid JSON: %w", err)
|
||||
}
|
||||
return json.MarshalIndent(sortKeys(obj), "", " ")
|
||||
}
|
||||
|
||||
// sortKeys recursively sorts map keys in a JSON-like structure.
|
||||
func sortKeys(v interface{}) interface{} {
|
||||
switch val := v.(type) {
|
||||
case map[string]interface{}:
|
||||
sorted := make(map[string]interface{}, len(val))
|
||||
for k, v := range val {
|
||||
sorted[k] = sortKeys(v)
|
||||
}
|
||||
return sorted
|
||||
case []interface{}:
|
||||
result := make([]interface{}, len(val))
|
||||
for i, v := range val {
|
||||
result[i] = sortKeys(v)
|
||||
}
|
||||
return result
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
// findProjectRoot finds the typescript-go project root by walking up from
|
||||
// the current test file.
|
||||
func findProjectRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for {
|
||||
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
|
||||
return dir
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
t.Fatal("could not find project root (no go.mod found)")
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
}
|
||||
|
||||
// findNodeJSWrapper finds the compiled Node.js TypeScript wrapper.
|
||||
func findNodeJSWrapper(projectRoot string) (string, error) {
|
||||
tsDir := filepath.Join(projectRoot, "..", "typescript")
|
||||
|
||||
jsPath := filepath.Join(tsDir, "build", "main.js")
|
||||
if _, err := os.Stat(jsPath); err == nil {
|
||||
return jsPath, nil
|
||||
}
|
||||
|
||||
tsPath := filepath.Join(tsDir, "src", "main.ts")
|
||||
if _, err := os.Stat(tsPath); err != nil {
|
||||
return "", fmt.Errorf("Node.js wrapper not found at %s", tsPath)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(filepath.Join(tsDir, "node_modules")); err != nil {
|
||||
cmd := exec.Command("npm", "install", "--no-audit", "--no-fund")
|
||||
cmd.Dir = tsDir
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", fmt.Errorf("npm install failed in %s: %v\n%s", tsDir, err, output)
|
||||
}
|
||||
}
|
||||
|
||||
cmd := exec.Command("npm", "run", "build")
|
||||
cmd.Dir = tsDir
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", fmt.Errorf("npm run build failed in %s: %v\n%s", tsDir, err, output)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(jsPath); err == nil {
|
||||
return jsPath, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("Node.js wrapper not found after build; expected at %s", jsPath)
|
||||
}
|
||||
|
||||
// parseWithNodeJSProtocol parses a file using the Node.js wrapper's protocol.
|
||||
// It starts the wrapper, sends the protocol commands, and extracts the AST response.
|
||||
func parseWithNodeJSProtocol(wrapperPath, filename string) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "node", "--no-warnings", wrapperPath)
|
||||
var stderr, stdout bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
cmd.Stdout = &stdout
|
||||
|
||||
stdinPipe, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start node.js wrapper: %v", err)
|
||||
}
|
||||
|
||||
commands := []string{
|
||||
fmt.Sprintf(`{"command":"parse","filename":"%s"}`, escapeJSON(filename)),
|
||||
`{"command":"quit"}`,
|
||||
}
|
||||
|
||||
for _, c := range commands {
|
||||
if _, err := io.WriteString(stdinPipe, c+"\n"); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
stdinPipe.Close()
|
||||
|
||||
err = cmd.Wait()
|
||||
if ctx.Err() != nil {
|
||||
return nil, fmt.Errorf("Node.js wrapper timed out; stderr: %s", stderr.String())
|
||||
}
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(line), &resp); err != nil {
|
||||
continue
|
||||
}
|
||||
if resp["type"] == "ast" {
|
||||
return []byte(line), nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no AST response found in output; stderr: %s; stdout lines: %d",
|
||||
stderr.String(), len(lines))
|
||||
}
|
||||
|
||||
// parseWithGoProtocol parses a file using the Go wrapper's protocol.
|
||||
func parseWithGoProtocol(binaryPath, filename string) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, binaryPath)
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
stdinPipe, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var stdout bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start Go wrapper: %v", err)
|
||||
}
|
||||
|
||||
commands := []string{
|
||||
fmt.Sprintf(`{"command":"parse","filename":"%s"}`, escapeJSON(filename)),
|
||||
`{"command":"quit"}`,
|
||||
}
|
||||
|
||||
for _, c := range commands {
|
||||
if _, err := io.WriteString(stdinPipe, c+"\n"); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
stdinPipe.Close()
|
||||
|
||||
err = cmd.Wait()
|
||||
if ctx.Err() != nil {
|
||||
return nil, fmt.Errorf("Go wrapper timed out; stderr: %s", stderr.String())
|
||||
}
|
||||
if err != nil {
|
||||
// Non-zero exit is ok if we got output (error responses are valid)
|
||||
}
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(stdout.String()), "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(line), &resp); err != nil {
|
||||
continue
|
||||
}
|
||||
if resp["type"] == "ast" {
|
||||
return []byte(line), nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no AST response in output; stderr: %s; stdout: %q",
|
||||
stderr.String(), stdout.String())
|
||||
}
|
||||
|
||||
func escapeJSON(s string) string {
|
||||
b, _ := json.Marshal(s)
|
||||
// Strip the surrounding quotes since we embed in a template
|
||||
return string(b[1 : len(b)-1])
|
||||
}
|
||||
|
||||
// TestCompareOutputs compares the JSON output of both wrappers for test files.
|
||||
func TestCompareOutputs(t *testing.T) {
|
||||
projectRoot := findProjectRoot(t)
|
||||
|
||||
// Build the Go wrapper
|
||||
binaryPath := filepath.Join(projectRoot, "bin", "typescript-parser-wrapper")
|
||||
buildCmd := exec.Command("go", "build", "-o", binaryPath, "./cmd/typescript-parser-wrapper/")
|
||||
buildCmd.Dir = projectRoot
|
||||
if output, err := buildCmd.CombinedOutput(); err != nil {
|
||||
t.Fatalf("failed to build Go wrapper: %v\n%s", err, output)
|
||||
}
|
||||
|
||||
// Find the Node.js wrapper
|
||||
nodejsWrapper, err := findNodeJSWrapper(projectRoot)
|
||||
if err != nil {
|
||||
t.Skipf("Skipping comparison test: %v", err)
|
||||
}
|
||||
|
||||
if _, err := exec.LookPath("node"); err != nil {
|
||||
t.Skip("Skipping comparison test: node not found on PATH")
|
||||
}
|
||||
|
||||
// Gather test files
|
||||
testFiles, err := filepath.Glob(filepath.Join(projectRoot, "testdata", "*.ts"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Extractor test inputs can be included via VALIDATION_EXTRACTOR_TESTS=1
|
||||
if os.Getenv("VALIDATION_EXTRACTOR_TESTS") == "1" {
|
||||
extractorTestDir := filepath.Join(projectRoot, "..", "..", "tests", "ts", "input")
|
||||
if extractorFiles, err := filepath.Glob(filepath.Join(extractorTestDir, "*.ts")); err == nil {
|
||||
testFiles = append(testFiles, extractorFiles...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(testFiles) == 0 {
|
||||
t.Skip("No test files found")
|
||||
}
|
||||
|
||||
for _, file := range testFiles {
|
||||
basename := filepath.Base(file)
|
||||
t.Run(basename, func(t *testing.T) {
|
||||
nodejsOut, err := parseWithNodeJSProtocol(nodejsWrapper, file)
|
||||
if err != nil {
|
||||
t.Skipf("Node.js wrapper failed: %v", err)
|
||||
}
|
||||
|
||||
goOut, err := parseWithGoProtocol(binaryPath, file)
|
||||
if err != nil {
|
||||
t.Skipf("Go wrapper failed: %v", err)
|
||||
}
|
||||
|
||||
nodejsNorm, err := normalizeJSON(bytes.TrimSpace(nodejsOut))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to normalize Node.js output: %v", err)
|
||||
}
|
||||
|
||||
goNorm, err := normalizeJSON(bytes.TrimSpace(goOut))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to normalize Go output: %v", err)
|
||||
}
|
||||
|
||||
if !bytes.Equal(nodejsNorm, goNorm) {
|
||||
outDir := filepath.Join(projectRoot, "validation-output")
|
||||
os.MkdirAll(outDir, 0755)
|
||||
os.WriteFile(filepath.Join(outDir, basename+".nodejs.json"), nodejsNorm, 0644)
|
||||
os.WriteFile(filepath.Join(outDir, basename+".go.json"), goNorm, 0644)
|
||||
|
||||
// Parse both outputs and check for structural diffs (ignoring expected kind/flags differences)
|
||||
var nodejsObj, goObj map[string]interface{}
|
||||
json.Unmarshal(nodejsNorm, &nodejsObj)
|
||||
json.Unmarshal(goNorm, &goObj)
|
||||
|
||||
structural := countStructuralDiffs(nodejsObj["ast"], goObj["ast"], "root")
|
||||
if structural > 0 {
|
||||
t.Errorf("Output has %d structural diff(s) for %s (beyond expected kind/flags diffs)\n"+
|
||||
" Node.js output saved to: validation-output/%s.nodejs.json\n"+
|
||||
" Go output saved to: validation-output/%s.go.json",
|
||||
structural, basename, basename, basename)
|
||||
} else {
|
||||
t.Logf("Output for %s differs only in expected kind/flags/token numeric values (TS5 vs TS7)", basename)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeJSON(t *testing.T) {
|
||||
input := `{"b":2,"a":1,"c":{"z":26,"y":25}}`
|
||||
expected := `{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
"c": {
|
||||
"y": 25,
|
||||
"z": 26
|
||||
}
|
||||
}`
|
||||
result, err := normalizeJSON([]byte(input))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(result) != expected {
|
||||
t.Errorf("got:\n%s\nexpected:\n%s", string(result), expected)
|
||||
}
|
||||
}
|
||||
|
||||
// numericValueKeys are JSON object keys whose numeric values are expected to differ
|
||||
// between TS5 and TS7 (SyntaxKind/NodeFlags numeric values).
|
||||
var numericValueKeys = map[string]bool{
|
||||
"kind": true,
|
||||
"flags": true,
|
||||
"token": true,
|
||||
"operator": true,
|
||||
}
|
||||
|
||||
// countStructuralDiffs recursively compares two JSON values and returns the
|
||||
// number of differences that are NOT expected TS5↔TS7 numeric kind/flags diffs.
|
||||
func countStructuralDiffs(a, b interface{}, path string) int {
|
||||
count := 0
|
||||
switch av := a.(type) {
|
||||
case map[string]interface{}:
|
||||
bv, ok := b.(map[string]interface{})
|
||||
if !ok {
|
||||
return 1
|
||||
}
|
||||
allKeys := map[string]bool{}
|
||||
for k := range av {
|
||||
allKeys[k] = true
|
||||
}
|
||||
for k := range bv {
|
||||
allKeys[k] = true
|
||||
}
|
||||
for k := range allKeys {
|
||||
aVal, aOk := av[k]
|
||||
bVal, bOk := bv[k]
|
||||
if !aOk || !bOk {
|
||||
count++
|
||||
continue
|
||||
}
|
||||
count += countStructuralDiffs(aVal, bVal, path+"."+k)
|
||||
}
|
||||
case []interface{}:
|
||||
bv, ok := b.([]interface{})
|
||||
if !ok {
|
||||
return 1
|
||||
}
|
||||
if len(av) != len(bv) {
|
||||
return 1
|
||||
}
|
||||
for i := range av {
|
||||
count += countStructuralDiffs(av[i], bv[i], fmt.Sprintf("%s[%d]", path, i))
|
||||
}
|
||||
default:
|
||||
if a != b {
|
||||
// Check if this is an expected numeric diff for kind/flags/token/operator
|
||||
key := lastPathComponent(path)
|
||||
if numericValueKeys[key] {
|
||||
// Both must be numbers for this to be an expected diff
|
||||
_, aNum := a.(float64)
|
||||
_, bNum := b.(float64)
|
||||
if aNum && bNum {
|
||||
return 0 // Expected TS5↔TS7 numeric diff
|
||||
}
|
||||
}
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func lastPathComponent(path string) string {
|
||||
for i := len(path) - 1; i >= 0; i-- {
|
||||
if path[i] == '.' {
|
||||
return path[i+1:]
|
||||
}
|
||||
}
|
||||
return path
|
||||
}
|
||||
254
javascript/extractor/lib/typescript-go/scripts/validate-output.sh
Executable file
254
javascript/extractor/lib/typescript-go/scripts/validate-output.sh
Executable file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env bash
|
||||
# validate-output.sh — Compare JSON output between the Node.js and Go
|
||||
# TypeScript parser wrappers.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/validate-output.sh [<ts-file> ...]
|
||||
#
|
||||
# Without arguments, it validates all .ts files from the test input directory.
|
||||
#
|
||||
# Environment variables:
|
||||
# NODEJS_WRAPPER — Path to Node.js wrapper main.js (default: auto-detect)
|
||||
# GO_WRAPPER — Path to Go wrapper binary (default: builds from source)
|
||||
# TIMEOUT — Seconds to wait for each parse (default: 10)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
EXTRACTOR_LIB="$(cd "$PROJECT_DIR/.." && pwd)"
|
||||
TYPESCRIPT_DIR="$EXTRACTOR_LIB/typescript"
|
||||
TIMEOUT="${TIMEOUT:-10}"
|
||||
|
||||
# Locate the Node.js wrapper (prefer compiled .js)
|
||||
find_nodejs_wrapper() {
|
||||
local js_path="$TYPESCRIPT_DIR/build/main.js"
|
||||
if [ -f "$js_path" ]; then
|
||||
echo "$js_path"
|
||||
return
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
NODEJS_WRAPPER="${NODEJS_WRAPPER:-$(find_nodejs_wrapper)}"
|
||||
|
||||
# Build and locate the Go wrapper
|
||||
GO_WRAPPER="${GO_WRAPPER:-$PROJECT_DIR/bin/typescript-parser-wrapper}"
|
||||
if [ ! -f "$GO_WRAPPER" ]; then
|
||||
echo "Building Go wrapper..."
|
||||
mkdir -p "$PROJECT_DIR/bin"
|
||||
(cd "$PROJECT_DIR" && go build -o bin/typescript-parser-wrapper ./cmd/typescript-parser-wrapper/) || {
|
||||
echo "Failed to build Go wrapper."
|
||||
GO_WRAPPER=""
|
||||
}
|
||||
fi
|
||||
|
||||
# Colors (disabled if not a terminal)
|
||||
if [ -t 1 ]; then
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
else
|
||||
RED='' GREEN='' YELLOW='' NC=''
|
||||
fi
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
SKIP=0
|
||||
|
||||
# Normalize JSON: sort keys, stable indentation
|
||||
normalize_json() {
|
||||
python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
obj = json.load(sys.stdin)
|
||||
print(json.dumps(obj, sort_keys=True, indent=2))
|
||||
except:
|
||||
sys.exit(1)
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
# Parse a file using the wrapper's stdin protocol.
|
||||
# Usage: parse_with_protocol <cmd> <file>
|
||||
# cmd: the shell command to start the wrapper (e.g., "node main.js" or "./wrapper")
|
||||
# file: absolute path to the .ts file
|
||||
#
|
||||
# Sends parse + quit commands on stdin and extracts the AST response line.
|
||||
parse_with_protocol() {
|
||||
local cmd="$1"
|
||||
local file="$2"
|
||||
|
||||
local output
|
||||
output=$(printf '{"command":"parse","filename":"%s"}\n{"command":"quit"}\n' "$file" \
|
||||
| timeout "$TIMEOUT" $cmd 2>/dev/null) || true
|
||||
|
||||
# Extract the line containing the AST response
|
||||
echo "$output" | while IFS= read -r line; do
|
||||
if echo "$line" | python3 -c "import json,sys; d=json.load(sys.stdin); sys.exit(0 if d.get('type')=='ast' else 1)" 2>/dev/null; then
|
||||
echo "$line"
|
||||
break
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Parse a file with the Node.js wrapper
|
||||
parse_nodejs() {
|
||||
local file="$1"
|
||||
if [ -z "$NODEJS_WRAPPER" ]; then
|
||||
echo ""
|
||||
return
|
||||
fi
|
||||
parse_with_protocol "node --no-warnings $NODEJS_WRAPPER" "$file"
|
||||
}
|
||||
|
||||
# Parse a file with the Go wrapper
|
||||
parse_go() {
|
||||
local file="$1"
|
||||
if [ -z "$GO_WRAPPER" ]; then
|
||||
echo ""
|
||||
return
|
||||
fi
|
||||
parse_with_protocol "$GO_WRAPPER" "$file"
|
||||
}
|
||||
|
||||
compare_output() {
|
||||
local file="$1"
|
||||
local basename
|
||||
basename="$(basename "$file")"
|
||||
|
||||
local nodejs_out go_out
|
||||
nodejs_out=$(parse_nodejs "$file")
|
||||
go_out=$(parse_go "$file")
|
||||
|
||||
if [ -z "$nodejs_out" ] && [ -z "$go_out" ]; then
|
||||
echo -e " ${YELLOW}SKIP${NC} $basename (both outputs empty)"
|
||||
SKIP=$((SKIP + 1))
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -z "$nodejs_out" ]; then
|
||||
echo -e " ${YELLOW}SKIP${NC} $basename (Node.js output empty)"
|
||||
SKIP=$((SKIP + 1))
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -z "$go_out" ]; then
|
||||
echo -e " ${YELLOW}SKIP${NC} $basename (Go output empty)"
|
||||
SKIP=$((SKIP + 1))
|
||||
return
|
||||
fi
|
||||
|
||||
local nodejs_norm go_norm
|
||||
nodejs_norm=$(echo "$nodejs_out" | normalize_json) || {
|
||||
echo -e " ${YELLOW}SKIP${NC} $basename (Node.js output not valid JSON)"
|
||||
SKIP=$((SKIP + 1))
|
||||
return
|
||||
}
|
||||
go_norm=$(echo "$go_out" | normalize_json) || {
|
||||
echo -e " ${YELLOW}SKIP${NC} $basename (Go output not valid JSON)"
|
||||
SKIP=$((SKIP + 1))
|
||||
return
|
||||
}
|
||||
|
||||
if [ "$nodejs_norm" = "$go_norm" ]; then
|
||||
echo -e " ${GREEN}PASS${NC} $basename (exact match)"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
# Check if differences are only expected TS5↔TS7 numeric kind/flags/token/operator values
|
||||
local structural_diffs
|
||||
structural_diffs=$(python3 -c "
|
||||
import json, sys
|
||||
|
||||
NUMERIC_VALUE_KEYS = {'kind', 'flags', 'token', 'operator'}
|
||||
|
||||
def count_structural(a, b, path='root'):
|
||||
count = 0
|
||||
if isinstance(a, dict) and isinstance(b, dict):
|
||||
keys = set(a) | set(b)
|
||||
for k in keys:
|
||||
# parseDiagnostics: Go always returns [], Node.js may have actual diagnostics
|
||||
if k == 'parseDiagnostics':
|
||||
continue
|
||||
if k not in a or k not in b:
|
||||
count += 1
|
||||
else:
|
||||
count += count_structural(a[k], b[k], path + '.' + k)
|
||||
elif isinstance(a, list) and isinstance(b, list):
|
||||
if len(a) != len(b):
|
||||
return 1
|
||||
for i in range(len(a)):
|
||||
count += count_structural(a[i], b[i], f'{path}[{i}]')
|
||||
elif a != b:
|
||||
key = path.rsplit('.', 1)[-1] if '.' in path else path
|
||||
if key in NUMERIC_VALUE_KEYS and isinstance(a, (int, float)) and isinstance(b, (int, float)):
|
||||
return 0
|
||||
count = 1
|
||||
return count
|
||||
|
||||
a = json.loads(sys.argv[1])
|
||||
b = json.loads(sys.argv[2])
|
||||
print(count_structural(a, b))
|
||||
" "$nodejs_norm" "$go_norm" 2>/dev/null) || structural_diffs="?"
|
||||
|
||||
if [ "$structural_diffs" = "0" ]; then
|
||||
echo -e " ${GREEN}PASS${NC} $basename (only expected TS5↔TS7 kind/flags diffs)"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo -e " ${RED}FAIL${NC} $basename ($structural_diffs structural diff(s))"
|
||||
FAIL=$((FAIL + 1))
|
||||
|
||||
# Save outputs for inspection
|
||||
local outdir="$PROJECT_DIR/validation-output"
|
||||
mkdir -p "$outdir"
|
||||
echo "$nodejs_norm" > "$outdir/${basename}.nodejs.json"
|
||||
echo "$go_norm" > "$outdir/${basename}.go.json"
|
||||
|
||||
# Show first few lines of diff
|
||||
diff <(echo "$nodejs_norm") <(echo "$go_norm") | head -30 || true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Gather files
|
||||
files=()
|
||||
if [ $# -gt 0 ]; then
|
||||
files=("$@")
|
||||
else
|
||||
# Use extractor test inputs
|
||||
TEST_DIR="$EXTRACTOR_LIB/../tests/ts/input"
|
||||
if [ -d "$TEST_DIR" ]; then
|
||||
for f in "$TEST_DIR"/*.ts; do
|
||||
[ -f "$f" ] && files+=("$f")
|
||||
done
|
||||
fi
|
||||
|
||||
# Also use our own test data
|
||||
for f in "$PROJECT_DIR/testdata"/*.ts; do
|
||||
[ -f "$f" ] && files+=("$f")
|
||||
done
|
||||
fi
|
||||
|
||||
if [ ${#files[@]} -eq 0 ]; then
|
||||
echo "No TypeScript files to validate."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=== TypeScript Parser Wrapper Validation ==="
|
||||
echo " Node.js wrapper: ${NODEJS_WRAPPER:-not found}"
|
||||
echo " Go wrapper: ${GO_WRAPPER:-not built}"
|
||||
echo " Files: ${#files[@]}"
|
||||
echo " Timeout: ${TIMEOUT}s per file"
|
||||
echo ""
|
||||
|
||||
for file in "${files[@]}"; do
|
||||
compare_output "$(realpath "$file")"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Results ==="
|
||||
echo -e " ${GREEN}PASS: $PASS${NC} ${RED}FAIL: $FAIL${NC} ${YELLOW}SKIP: $SKIP${NC}"
|
||||
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
77
javascript/extractor/lib/typescript-go/testdata/sample.ts
vendored
Normal file
77
javascript/extractor/lib/typescript-go/testdata/sample.ts
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
// Test file for validating the TypeScript parser wrapper.
|
||||
// This file exercises various TypeScript features to ensure the AST
|
||||
// serialization produces the correct output.
|
||||
|
||||
interface Greeter {
|
||||
greet(name: string): string;
|
||||
}
|
||||
|
||||
class HelloGreeter implements Greeter {
|
||||
private prefix: string;
|
||||
|
||||
constructor(prefix: string = "Hello") {
|
||||
this.prefix = prefix;
|
||||
}
|
||||
|
||||
greet(name: string): string {
|
||||
return `${this.prefix}, ${name}!`;
|
||||
}
|
||||
}
|
||||
|
||||
// Generics
|
||||
function identity<T>(arg: T): T {
|
||||
return arg;
|
||||
}
|
||||
|
||||
// Conditional types
|
||||
type IsString<T> = T extends string ? "yes" : "no";
|
||||
|
||||
// Async/await
|
||||
async function fetchData(url: string): Promise<string> {
|
||||
const response = await fetch(url);
|
||||
return response.text();
|
||||
}
|
||||
|
||||
// Destructuring
|
||||
const { a, b, ...rest } = { a: 1, b: 2, c: 3, d: 4 };
|
||||
const [first, second, ...remaining] = [1, 2, 3, 4, 5];
|
||||
|
||||
// Enums
|
||||
enum Direction {
|
||||
Up = "UP",
|
||||
Down = "DOWN",
|
||||
Left = "LEFT",
|
||||
Right = "RIGHT",
|
||||
}
|
||||
|
||||
// Type assertions
|
||||
const value = "hello" as unknown as number;
|
||||
|
||||
// Optional chaining
|
||||
const len = value?.toString()?.length;
|
||||
|
||||
// Nullish coalescing
|
||||
const result = len ?? 0;
|
||||
|
||||
// Decorators
|
||||
function log(target: any, key: string, descriptor: PropertyDescriptor) {
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
// Namespace
|
||||
namespace Validation {
|
||||
export interface StringValidator {
|
||||
isAcceptable(s: string): boolean;
|
||||
}
|
||||
}
|
||||
|
||||
// Mapped types
|
||||
type Readonly<T> = {
|
||||
readonly [P in keyof T]: T[P];
|
||||
};
|
||||
|
||||
// Template literal types
|
||||
type EventName = `on${string}`;
|
||||
|
||||
// Export
|
||||
export { HelloGreeter, Direction, fetchData };
|
||||
@@ -56,6 +56,11 @@ import ch.qos.logback.classic.Level;
|
||||
* $SEMMLE_DIST/tools/typescript-parser-wrapper/main.js}; non-standard locations can be configured
|
||||
* using the property {@value #PARSER_WRAPPER_PATH_ENV_VAR}.
|
||||
*
|
||||
* <p>Alternatively, a Go-based parser wrapper can be used by setting the environment variable
|
||||
* {@value #USE_GO_PARSER_VAR} to {@code true}. This uses the TypeScript 7 (Go-based) compiler
|
||||
* and does not require Node.js. The Go binary location can be configured using {@value
|
||||
* #GO_PARSER_WRAPPER_PATH_ENV_VAR}.
|
||||
*
|
||||
* <p>The script launches the Node.js wrapper in the Node.js runtime, looking for {@code node} on
|
||||
* the {@code PATH} by default. Non-standard locations can be configured using the property {@value
|
||||
* #TYPESCRIPT_NODE_RUNTIME_VAR}, and additional arguments can be configured using the property
|
||||
@@ -124,6 +129,23 @@ public class TypeScriptParser {
|
||||
*/
|
||||
public static final String TYPESCRIPT_NODE_FLAGS = "SEMMLE_TYPESCRIPT_NODE_FLAGS";
|
||||
|
||||
/**
|
||||
* An environment variable that, when set to {@code true}, causes the extractor to use a Go-based
|
||||
* TypeScript parser wrapper (using TypeScript 7) instead of the Node.js wrapper.
|
||||
*
|
||||
* <p>This is experimental and does not require Node.js to be installed.
|
||||
*/
|
||||
public static final String USE_GO_PARSER_VAR = "SEMMLE_TYPESCRIPT_USE_GO_PARSER";
|
||||
|
||||
/**
|
||||
* An environment variable that can be set to indicate the location of the Go TypeScript parser
|
||||
* wrapper binary.
|
||||
*
|
||||
* <p>Only used when {@value #USE_GO_PARSER_VAR} is set to {@code true}.
|
||||
* Defaults to {@code $SEMMLE_DIST/tools/typescript-parser-wrapper-go} if not set.
|
||||
*/
|
||||
public static final String GO_PARSER_WRAPPER_PATH_ENV_VAR = "SEMMLE_TYPESCRIPT_GO_PARSER_WRAPPER";
|
||||
|
||||
/**
|
||||
* Exit code for Node.js in case of a fatal error from V8. This exit code sometimes occurs
|
||||
* when the process runs out of memory.
|
||||
@@ -141,6 +163,9 @@ public class TypeScriptParser {
|
||||
|
||||
private String parserWrapperCommand;
|
||||
|
||||
/** Whether we are using the Go-based TypeScript parser instead of Node.js. */
|
||||
private boolean useGoParser = "true".equalsIgnoreCase(Env.systemEnv().get(USE_GO_PARSER_VAR));
|
||||
|
||||
/** Streams for communicating with the Node.js parser wrapper process. */
|
||||
private BufferedWriter toParserWrapper;
|
||||
|
||||
@@ -171,10 +196,19 @@ public class TypeScriptParser {
|
||||
/**
|
||||
* Verifies that Node.js and TypeScript are installed and throws an exception otherwise.
|
||||
*
|
||||
* <p>When the Go parser is enabled, this only verifies the Go binary exists.
|
||||
*
|
||||
* @param verbose if true, log the Node.js executable path, version strings, and any additional
|
||||
* arguments.
|
||||
*/
|
||||
public void verifyInstallation(boolean verbose) {
|
||||
if (useGoParser) {
|
||||
File goWrapper = getGoParserWrapper();
|
||||
if (verbose) {
|
||||
System.out.println("Using Go TypeScript parser wrapper: " + goWrapper.getAbsolutePath());
|
||||
}
|
||||
return;
|
||||
}
|
||||
verifyNodeInstallation();
|
||||
if (verbose) {
|
||||
System.out.println("Found Node.js at: " + nodeJsRuntime);
|
||||
@@ -273,8 +307,69 @@ public class TypeScriptParser {
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Start the Node.js parser wrapper process. */
|
||||
/** Start the parser wrapper process (Node.js or Go). */
|
||||
private void setupParserWrapper() {
|
||||
if (useGoParser) {
|
||||
setupGoParserWrapper();
|
||||
} else {
|
||||
setupNodeParserWrapper();
|
||||
}
|
||||
}
|
||||
|
||||
/** Start the Go-based parser wrapper process. */
|
||||
private void setupGoParserWrapper() {
|
||||
File goWrapper = getGoParserWrapper();
|
||||
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add(goWrapper.getAbsolutePath());
|
||||
|
||||
ProcessBuilder pb = new ProcessBuilder(cmd);
|
||||
parserWrapperCommand = StringUtil.glue(" ", cmd);
|
||||
|
||||
// Pass the tsgo binary location if configured
|
||||
String tsgoBinary = Env.systemEnv().get("SEMMLE_TYPESCRIPT_TSGO_BINARY");
|
||||
if (tsgoBinary != null) {
|
||||
pb.environment().put("SEMMLE_TYPESCRIPT_TSGO_BINARY", tsgoBinary);
|
||||
}
|
||||
|
||||
try {
|
||||
pb.redirectError(Redirect.INHERIT);
|
||||
parserWrapperProcess = pb.start();
|
||||
OutputStream os = parserWrapperProcess.getOutputStream();
|
||||
OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
|
||||
toParserWrapper = new BufferedWriter(osw);
|
||||
InputStream is = parserWrapperProcess.getInputStream();
|
||||
InputStreamReader isr = new InputStreamReader(is, "UTF-8");
|
||||
fromParserWrapper = new BufferedReader(isr);
|
||||
this.loadMetadata();
|
||||
} catch (IOException e) {
|
||||
throw new CatastrophicError(
|
||||
"Could not start Go TypeScript parser wrapper "
|
||||
+ "(command: " + parserWrapperCommand + ")",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the location of the Go parser wrapper binary. */
|
||||
private File getGoParserWrapper() {
|
||||
String explicitPath = Env.systemEnv().get(GO_PARSER_WRAPPER_PATH_ENV_VAR);
|
||||
File goWrapper;
|
||||
if (explicitPath != null) {
|
||||
goWrapper = new File(explicitPath);
|
||||
} else {
|
||||
goWrapper =
|
||||
new File(EnvironmentVariables.getExtractorRoot(), "tools/typescript-parser-wrapper-go");
|
||||
}
|
||||
if (!goWrapper.isFile()) {
|
||||
throw new ResourceError(
|
||||
"Could not find Go TypeScript parser wrapper: " + goWrapper + " does not exist.\n"
|
||||
+ "Set " + GO_PARSER_WRAPPER_PATH_ENV_VAR + " to the path of the Go wrapper binary.");
|
||||
}
|
||||
return goWrapper;
|
||||
}
|
||||
|
||||
/** Start the Node.js parser wrapper process. */
|
||||
private void setupNodeParserWrapper() {
|
||||
verifyNodeInstallation();
|
||||
|
||||
int mainMemoryMb =
|
||||
@@ -344,7 +439,7 @@ public class TypeScriptParser {
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a {@code request} to the Node.js parser wrapper process, and return the response it
|
||||
* Send a {@code request} to the parser wrapper process, and return the response it
|
||||
* replies with.
|
||||
*/
|
||||
private JsonObject talkToParserWrapper(JsonObject request) {
|
||||
@@ -512,7 +607,7 @@ public class TypeScriptParser {
|
||||
}
|
||||
|
||||
/**
|
||||
* Forcibly closes the Node.js process.
|
||||
* Forcibly closes the parser wrapper process (Node.js or Go).
|
||||
*
|
||||
* <p>A new process will be started the next time a request is made.
|
||||
*/
|
||||
|
||||
@@ -1 +1 @@
|
||||
**/*ql*/javascript/extractor/tests/*/input//
|
||||
**/javascript/extractor/tests/*/input//
|
||||
|
||||
@@ -10,10 +10,6 @@
|
||||
* `type, path, kind`
|
||||
* - Summaries:
|
||||
* `type, path, input, output, kind`
|
||||
* - Barriers:
|
||||
* `type, path, kind`
|
||||
* - BarrierGuards:
|
||||
* `type, path, acceptingValue, kind`
|
||||
* - Types:
|
||||
* `type1, type2, path`
|
||||
*
|
||||
@@ -46,8 +42,7 @@
|
||||
* 3. The `input` and `output` columns specify how data enters and leaves the element selected by the
|
||||
* first `(type, path)` tuple. Both strings are `.`-separated access paths
|
||||
* of the same syntax as the `path` column.
|
||||
* 4. The `acceptingValue` column of barrier guard models specifies which branch of the guard is blocking flow. It can be "true" or "false".
|
||||
* 5. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 4. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources `"remote"` indicates a default remote flow source, and for summaries
|
||||
* `"taint"` indicates a default additional taint step and `"value"` indicates a
|
||||
@@ -360,11 +355,11 @@ private predicate barrierModel(string type, string path, string kind, string mod
|
||||
|
||||
/** Holds if a barrier guard model exists for the given parameters. */
|
||||
private predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, string model
|
||||
string type, string path, string branch, string kind, string model
|
||||
) {
|
||||
// No deprecation adapter for barrier models, they were not around back then.
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::barrierGuardModel(type, path, acceptingValue, kind, madId) and
|
||||
Extensions::barrierGuardModel(type, path, branch, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
@@ -788,16 +783,16 @@ module ModelOutput {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `acceptingValue`.
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `branch`.
|
||||
*/
|
||||
cached
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue, string model) {
|
||||
exists(string type, string path, string acceptingValue_str |
|
||||
acceptingValue = true and acceptingValue_str = "true"
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch, string model) {
|
||||
exists(string type, string path, string branch_str |
|
||||
branch = true and branch_str = "true"
|
||||
or
|
||||
acceptingValue = false and acceptingValue_str = "false"
|
||||
branch = false and branch_str = "false"
|
||||
|
|
||||
barrierGuardModel(type, path, acceptingValue_str, kind, model) and
|
||||
barrierGuardModel(type, path, branch_str, kind, model) and
|
||||
result = getNodeFromPath(type, path)
|
||||
)
|
||||
}
|
||||
@@ -861,12 +856,12 @@ module ModelOutput {
|
||||
API::Node getABarrierNode(string kind) { result = getABarrierNode(kind, _) }
|
||||
|
||||
/**
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `acceptingValue`.
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `branch`.
|
||||
*
|
||||
* INTERNAL: Do not use.
|
||||
*/
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue) {
|
||||
result = getABarrierGuardNode(kind, acceptingValue, _)
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch) {
|
||||
result = getABarrierGuardNode(kind, branch, _)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -33,11 +33,11 @@ extensible predicate barrierModel(
|
||||
* of the given `kind` and `madId` is the data extension row number.
|
||||
* `path` is assumed to lead to a parameter of a call (possibly `self`), and
|
||||
* the call is guarding the parameter.
|
||||
* `acceptingValue` is either `true` or `false`, indicating which branch of
|
||||
* the guard is protecting the parameter.
|
||||
* `branch` is either `true` or `false`, indicating which branch of the guard
|
||||
* is protecting the parameter.
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, QlBuiltins::ExtensionId madId
|
||||
string type, string path, string branch, string kind, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -191,21 +191,3 @@ class RouteHandlerLimitedByRateLimiterFlexible extends RateLimitingMiddleware in
|
||||
private class FastifyRateLimiter extends RateLimitingMiddleware {
|
||||
FastifyRateLimiter() { this = DataFlow::moduleImport("fastify-rate-limit") }
|
||||
}
|
||||
|
||||
/**
|
||||
* An options object with a `rateLimit` config passed to a Fastify shorthand route method,
|
||||
* such as `fastify.post('/path', { config: { rateLimit: { ... } } }, handler)`.
|
||||
*/
|
||||
private class FastifyPerRouteRateLimit extends RateLimitingMiddleware {
|
||||
FastifyPerRouteRateLimit() {
|
||||
exists(Fastify::RouteSetup setup |
|
||||
not setup.getMethodName() = ["route", "addHook"] and
|
||||
setup.getNumArgument() >= 3 and
|
||||
this.flowsTo(setup.getArgument(1))
|
||||
|
|
||||
exists(this.getAPropertySource("config").getAPropertySource("rateLimit"))
|
||||
or
|
||||
exists(this.getAPropertySource("rateLimit"))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* The query `js/missing-rate-limiting` now takes Fastify per-route
|
||||
rate limiting into account.
|
||||
@@ -9,4 +9,3 @@
|
||||
| tst.js:64:25:64:63 | functio ... req); } | This route handler performs $@, but is not rate-limited. | tst.js:64:46:64:60 | verifyUser(req) | authorization |
|
||||
| tst.js:76:25:76:53 | catchAs ... ndler1) | This route handler performs $@, but is not rate-limited. | tst.js:14:40:14:46 | login() | authorization |
|
||||
| tst.js:88:24:88:40 | expensiveHandler1 | This route handler performs $@, but is not rate-limited. | tst.js:14:40:14:46 | login() | authorization |
|
||||
| tst.js:112:28:112:44 | expensiveHandler1 | This route handler performs $@, but is not rate-limited. | tst.js:14:40:14:46 | login() | authorization |
|
||||
|
||||
@@ -88,25 +88,3 @@ const fastifyApp = require('fastify')();
|
||||
fastifyApp.get('/foo', expensiveHandler1); // $ Alert
|
||||
fastifyApp.register(require('fastify-rate-limit'));
|
||||
fastifyApp.get('/bar', expensiveHandler1);
|
||||
|
||||
// Fastify per-route rate limiting via config.rateLimit
|
||||
const fastifyApp2 = require('fastify')();
|
||||
fastifyApp2.register(require('@fastify/rate-limit'));
|
||||
|
||||
fastifyApp2.post('/login', {
|
||||
config: {
|
||||
rateLimit: {
|
||||
max: 3,
|
||||
timeWindow: '1 minute'
|
||||
}
|
||||
}
|
||||
}, expensiveHandler1); // OK - has per-route rateLimit config
|
||||
|
||||
fastifyApp2.post('/signup', {
|
||||
rateLimit: {
|
||||
max: 5,
|
||||
timeWindow: '1 minute'
|
||||
}
|
||||
}, expensiveHandler1); // OK - has per-route rateLimit directly in options
|
||||
|
||||
fastifyApp2.post('/other', expensiveHandler1); // $ Alert - no rate limiting
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
/**
|
||||
* Provides a reusable data-flow configuration for tracking class instances
|
||||
* through global data-flow with full path support.
|
||||
*
|
||||
* This module is designed for quality queries that check whether instances
|
||||
* of certain classes reach operations that require a specific interface
|
||||
* (e.g., `__contains__`, `__iter__`, `__hash__`).
|
||||
*
|
||||
* The configuration uses two flow states:
|
||||
* - `TrackingClass`: tracking a reference to the class itself
|
||||
* - `TrackingInstance`: tracking an instance of the class
|
||||
*
|
||||
* At instantiation points (e.g., `cls()`), the state transitions from
|
||||
* `TrackingClass` to `TrackingInstance`. Sinks are only matched in the
|
||||
* `TrackingInstance` state.
|
||||
*/
|
||||
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/** A flow state for tracking class references and their instances. */
|
||||
abstract class ClassInstanceFlowState extends string {
|
||||
bindingset[this]
|
||||
ClassInstanceFlowState() { any() }
|
||||
}
|
||||
|
||||
/** A state signifying that the tracked value is a reference to the class itself. */
|
||||
class TrackingClass extends ClassInstanceFlowState {
|
||||
TrackingClass() { this = "TrackingClass" }
|
||||
}
|
||||
|
||||
/** A state signifying that the tracked value is an instance of the class. */
|
||||
class TrackingInstance extends ClassInstanceFlowState {
|
||||
TrackingInstance() { this = "TrackingInstance" }
|
||||
}
|
||||
|
||||
/**
|
||||
* Signature module for parameterizing `ClassInstanceFlow` per query.
|
||||
*/
|
||||
signature module ClassInstanceFlowSig {
|
||||
/** Holds if `cls` is a class whose instances should be tracked to sinks. */
|
||||
predicate isRelevantClass(Class cls);
|
||||
|
||||
/** Holds if `sink` is a location where reaching instances indicate a violation. */
|
||||
predicate isInstanceSink(DataFlow::Node sink);
|
||||
|
||||
/**
|
||||
* Holds if an `isinstance` check against `checkedType` should act as a barrier,
|
||||
* suppressing alerts when the instance has been verified to have the expected interface.
|
||||
*/
|
||||
predicate isGuardType(DataFlow::Node checkedType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a global data-flow configuration for tracking instances of
|
||||
* relevant classes from their definition to violation sinks.
|
||||
*/
|
||||
module ClassInstanceFlow<ClassInstanceFlowSig Sig> {
|
||||
/**
|
||||
* Holds if `guard` is an `isinstance` call checking `node` against a type
|
||||
* that should suppress the alert.
|
||||
*/
|
||||
private predicate isinstanceGuard(DataFlow::GuardNode guard, ControlFlowNode node, boolean branch) {
|
||||
exists(DataFlow::CallCfgNode isinstance_call |
|
||||
isinstance_call = API::builtin("isinstance").getACall() and
|
||||
isinstance_call.getArg(0).asCfgNode() = node and
|
||||
(
|
||||
Sig::isGuardType(isinstance_call.getArg(1))
|
||||
or
|
||||
// Also handle tuples of types: isinstance(x, (T1, T2))
|
||||
Sig::isGuardType(DataFlow::exprNode(isinstance_call.getArg(1).asExpr().(Tuple).getAnElt()))
|
||||
) and
|
||||
guard = isinstance_call.asCfgNode() and
|
||||
branch = true
|
||||
)
|
||||
}
|
||||
|
||||
private module Config implements DataFlow::StateConfigSig {
|
||||
class FlowState = ClassInstanceFlowState;
|
||||
|
||||
predicate isSource(DataFlow::Node source, FlowState state) {
|
||||
exists(ClassExpr ce |
|
||||
Sig::isRelevantClass(ce.getInnerScope()) and
|
||||
source.asExpr() = ce and
|
||||
state instanceof TrackingClass
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSink(DataFlow::Node sink, FlowState state) {
|
||||
Sig::isInstanceSink(sink) and
|
||||
state instanceof TrackingInstance
|
||||
}
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) {
|
||||
node = DataFlow::BarrierGuard<isinstanceGuard/3>::getABarrierNode()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `call` is inside a branch that is guarded by a condition
|
||||
* depending on a parameter of the enclosing function. In such cases,
|
||||
* the instantiation is contextual — it only happens for certain argument
|
||||
* values — and we cannot determine from the call site whether it will
|
||||
* actually execute.
|
||||
*/
|
||||
private predicate parameterGuardedCall(CallNode call) {
|
||||
exists(ConditionBlock guard, DataFlow::ParameterNode param, DataFlow::Node guardSubExpr |
|
||||
guard.controls(call.getBasicBlock(), _) and
|
||||
param.getScope() = call.getScope() and
|
||||
guardSubExpr.asCfgNode() = guard.getLastNode().getAChild*() and
|
||||
DataFlow::localFlow(param, guardSubExpr)
|
||||
)
|
||||
}
|
||||
|
||||
predicate isAdditionalFlowStep(
|
||||
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
|
||||
) {
|
||||
// Instantiation: class reference at the call function position
|
||||
// flows to the call result as an instance.
|
||||
stateFrom instanceof TrackingClass and
|
||||
stateTo instanceof TrackingInstance and
|
||||
exists(CallNode call |
|
||||
nodeFrom.asCfgNode() = call.getFunction() and
|
||||
nodeTo.asCfgNode() = call and
|
||||
// Exclude decorator applications, where the result is a proxy
|
||||
// rather than a typical instance.
|
||||
not call.getNode() = any(FunctionExpr fe).getADecoratorCall() and
|
||||
// Exclude instantiations guarded by parameter-dependent conditions,
|
||||
// since we cannot determine from the call site whether the guard
|
||||
// will be satisfied.
|
||||
not parameterGuardedCall(call)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
module Flow = DataFlow::GlobalWithState<Config>;
|
||||
}
|
||||
@@ -10,10 +10,6 @@
|
||||
* `type, path, kind`
|
||||
* - Summaries:
|
||||
* `type, path, input, output, kind`
|
||||
* - Barriers:
|
||||
* `type, path, kind`
|
||||
* - BarrierGuards:
|
||||
* `type, path, acceptingValue, kind`
|
||||
* - Types:
|
||||
* `type1, type2, path`
|
||||
*
|
||||
@@ -46,8 +42,7 @@
|
||||
* 3. The `input` and `output` columns specify how data enters and leaves the element selected by the
|
||||
* first `(type, path)` tuple. Both strings are `.`-separated access paths
|
||||
* of the same syntax as the `path` column.
|
||||
* 4. The `acceptingValue` column of barrier guard models specifies which branch of the guard is blocking flow. It can be "true" or "false".
|
||||
* 5. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 4. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources `"remote"` indicates a default remote flow source, and for summaries
|
||||
* `"taint"` indicates a default additional taint step and `"value"` indicates a
|
||||
@@ -360,11 +355,11 @@ private predicate barrierModel(string type, string path, string kind, string mod
|
||||
|
||||
/** Holds if a barrier guard model exists for the given parameters. */
|
||||
private predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, string model
|
||||
string type, string path, string branch, string kind, string model
|
||||
) {
|
||||
// No deprecation adapter for barrier models, they were not around back then.
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::barrierGuardModel(type, path, acceptingValue, kind, madId) and
|
||||
Extensions::barrierGuardModel(type, path, branch, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
@@ -788,16 +783,16 @@ module ModelOutput {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `acceptingValue`.
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `branch`.
|
||||
*/
|
||||
cached
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue, string model) {
|
||||
exists(string type, string path, string acceptingValue_str |
|
||||
acceptingValue = true and acceptingValue_str = "true"
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch, string model) {
|
||||
exists(string type, string path, string branch_str |
|
||||
branch = true and branch_str = "true"
|
||||
or
|
||||
acceptingValue = false and acceptingValue_str = "false"
|
||||
branch = false and branch_str = "false"
|
||||
|
|
||||
barrierGuardModel(type, path, acceptingValue_str, kind, model) and
|
||||
barrierGuardModel(type, path, branch_str, kind, model) and
|
||||
result = getNodeFromPath(type, path)
|
||||
)
|
||||
}
|
||||
@@ -861,12 +856,12 @@ module ModelOutput {
|
||||
API::Node getABarrierNode(string kind) { result = getABarrierNode(kind, _) }
|
||||
|
||||
/**
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `acceptingValue`.
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `branch`.
|
||||
*
|
||||
* INTERNAL: Do not use.
|
||||
*/
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue) {
|
||||
result = getABarrierGuardNode(kind, acceptingValue, _)
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch) {
|
||||
result = getABarrierGuardNode(kind, branch, _)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -33,11 +33,11 @@ extensible predicate barrierModel(
|
||||
* of the given `kind` and `madId` is the data extension row number.
|
||||
* `path` is assumed to lead to a parameter of a call (possibly `self`), and
|
||||
* the call is guarding the parameter.
|
||||
* `acceptingValue` is either `true` or `false`, indicating which branch of
|
||||
* the guard is protecting the parameter.
|
||||
* `branch` is either `true` or `false`, indicating which branch of the guard
|
||||
* is protecting the parameter.
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, QlBuiltins::ExtensionId madId
|
||||
string type, string path, string branch, string kind, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* @name Membership test with a non-container
|
||||
* @description A membership test, such as 'item in sequence', with a non-container on the right hand side will raise a 'TypeError'.
|
||||
* @kind path-problem
|
||||
* @kind problem
|
||||
* @tags quality
|
||||
* reliability
|
||||
* correctness
|
||||
@@ -12,47 +12,25 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch
|
||||
private import semmle.python.dataflow.new.internal.ClassInstanceFlow
|
||||
private import semmle.python.ApiGraphs
|
||||
private import LegacyPointsTo
|
||||
|
||||
predicate rhs_in_expr(Expr rhs, Compare cmp) {
|
||||
exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs |
|
||||
predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) {
|
||||
exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() |
|
||||
op instanceof In or op instanceof NotIn
|
||||
)
|
||||
}
|
||||
|
||||
module ContainsNonContainerSig implements ClassInstanceFlowSig {
|
||||
predicate isRelevantClass(Class cls) {
|
||||
not DuckTyping::isContainer(cls) and
|
||||
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and
|
||||
not exists(CallNode setattr_call |
|
||||
setattr_call.getFunction().(NameNode).getId() = "setattr" and
|
||||
setattr_call.getArg(0).(NameNode).getId() = cls.getName() and
|
||||
setattr_call.getScope() = cls.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isInstanceSink(DataFlow::Node sink) { rhs_in_expr(sink.asExpr(), _) }
|
||||
|
||||
predicate isGuardType(DataFlow::Node checkedType) {
|
||||
checkedType =
|
||||
API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"])
|
||||
.getAValueReachableFromSource()
|
||||
}
|
||||
}
|
||||
|
||||
module ContainsNonContainerFlow = ClassInstanceFlow<ContainsNonContainerSig>;
|
||||
|
||||
import ContainsNonContainerFlow::Flow::PathGraph
|
||||
|
||||
from
|
||||
ContainsNonContainerFlow::Flow::PathNode source, ContainsNonContainerFlow::Flow::PathNode sink,
|
||||
ClassExpr ce
|
||||
ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin
|
||||
where
|
||||
ContainsNonContainerFlow::Flow::flowPath(source, sink) and
|
||||
source.getNode().asExpr() = ce
|
||||
select sink.getNode(), source, sink,
|
||||
"This test may raise an Exception as the $@ may be of non-container class $@.", source.getNode(),
|
||||
"target", ce.getInnerScope(), ce.getInnerScope().getName()
|
||||
rhs_in_expr(non_seq, cmp) and
|
||||
non_seq.pointsTo(_, v, origin) and
|
||||
v.getClass() = cls and
|
||||
not Types::failedInference(cls, _) and
|
||||
not cls.hasAttribute("__contains__") and
|
||||
not cls.hasAttribute("__iter__") and
|
||||
not cls.hasAttribute("__getitem__") and
|
||||
not cls = ClassValue::nonetype() and
|
||||
not cls = Value::named("types.MappingProxyType")
|
||||
select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin,
|
||||
"target", cls, cls.getName()
|
||||
|
||||
@@ -12,10 +12,10 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import LegacyPointsTo
|
||||
|
||||
predicate originIsLocals(ControlFlowNode n) {
|
||||
API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n
|
||||
predicate originIsLocals(ControlFlowNodeWithPointsTo n) {
|
||||
n.pointsTo(_, _, Value::named("locals").getACall())
|
||||
}
|
||||
|
||||
predicate modification_of_locals(ControlFlowNode f) {
|
||||
@@ -37,8 +37,5 @@ where
|
||||
// in module level scope `locals() == globals()`
|
||||
// see https://docs.python.org/3/library/functions.html#locals
|
||||
// FP report in https://github.com/github/codeql/issues/6674
|
||||
not a.getScope() instanceof Module and
|
||||
// in class level scope `locals()` reflects the class namespace,
|
||||
// so modifications do take effect.
|
||||
not a.getScope() instanceof Class
|
||||
not a.getScope() instanceof ModuleScope
|
||||
select a, "Modification of the locals() dictionary will have no effect on the local variables."
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import LegacyPointsTo
|
||||
|
||||
predicate typing_import(ImportingStmt is) {
|
||||
exists(Module m |
|
||||
@@ -34,7 +34,11 @@ predicate unique_yield(Stmt s) {
|
||||
/** Holds if `contextlib.suppress` may be used in the same scope as `s` */
|
||||
predicate suppression_in_scope(Stmt s) {
|
||||
exists(With w |
|
||||
w.getContextExpr() = API::moduleImport("contextlib").getMember("suppress").getACall().asExpr() and
|
||||
w.getContextExpr()
|
||||
.(Call)
|
||||
.getFunc()
|
||||
.(ExprWithPointsTo)
|
||||
.pointsTo(Value::named("contextlib.suppress")) and
|
||||
w.getScope() = s.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -12,49 +12,11 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch
|
||||
private import semmle.python.dataflow.new.internal.Builtins
|
||||
private import semmle.python.ApiGraphs
|
||||
private import LegacyPointsTo
|
||||
|
||||
/**
|
||||
* Holds if `cls` is a user-defined exception class, i.e. it transitively
|
||||
* extends one of the builtin exception base classes.
|
||||
*/
|
||||
predicate isUserDefinedExceptionClass(Class cls) {
|
||||
cls.getABase() =
|
||||
API::builtin(["BaseException", "Exception"]).getAValueReachableFromSource().asExpr()
|
||||
or
|
||||
isUserDefinedExceptionClass(getADirectSuperclass(cls))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of a builtin exception class.
|
||||
*/
|
||||
string getBuiltinExceptionName() {
|
||||
result = Builtins::getBuiltinName() and
|
||||
(
|
||||
result.matches("%Error") or
|
||||
result.matches("%Exception") or
|
||||
result.matches("%Warning") or
|
||||
result =
|
||||
["GeneratorExit", "KeyboardInterrupt", "StopIteration", "StopAsyncIteration", "SystemExit"]
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `call` is an instantiation of an exception class.
|
||||
*/
|
||||
predicate isExceptionInstantiation(Call call) {
|
||||
exists(Class cls |
|
||||
classTracker(cls).asExpr() = call.getFunc() and
|
||||
isUserDefinedExceptionClass(cls)
|
||||
)
|
||||
or
|
||||
call.getFunc() = API::builtin(getBuiltinExceptionName()).getAValueReachableFromSource().asExpr()
|
||||
}
|
||||
|
||||
from Call call
|
||||
from Call call, ClassValue ex
|
||||
where
|
||||
isExceptionInstantiation(call) and
|
||||
call.getFunc().(ExprWithPointsTo).pointsTo(ex) and
|
||||
ex.getASuperType() = ClassValue::exception() and
|
||||
exists(ExprStmt s | s.getValue() = call)
|
||||
select call, "Instantiating an exception, but not raising it, has no effect."
|
||||
|
||||
@@ -12,12 +12,10 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import LegacyPointsTo
|
||||
|
||||
from CallNode call, string name
|
||||
where
|
||||
name = ["exit", "quit"] and
|
||||
call = API::builtin(name).getACall().asCfgNode()
|
||||
where call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::siteQuitter(name))
|
||||
select call,
|
||||
"The '" + name +
|
||||
"' site.Quitter object may not exist if the 'site' module is not loaded or is modified."
|
||||
|
||||
@@ -1,22 +1,2 @@
|
||||
edges
|
||||
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | provenance | |
|
||||
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | provenance | |
|
||||
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | provenance | |
|
||||
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
|
||||
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | |
|
||||
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | provenance | Config |
|
||||
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | expressions_test.py:88:5:88:7 | ControlFlowNode for seq | provenance | |
|
||||
nodes
|
||||
| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | semmle.label | ControlFlowNode for ClassExpr |
|
||||
| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
|
||||
| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
|
||||
| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter |
|
||||
| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | semmle.label | ControlFlowNode for XIter() |
|
||||
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
|
||||
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
|
||||
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq |
|
||||
subpaths
|
||||
#select
|
||||
| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
|
||||
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
|
||||
| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter |
|
||||
| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter |
|
||||
| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter |
|
||||
|
||||
@@ -279,62 +279,3 @@ def useofapply():
|
||||
def apply(f):
|
||||
pass
|
||||
apply(foo)([1])
|
||||
|
||||
# Class used as a decorator: the runtime value at attribute access is the
|
||||
# function's return value, not the decorator class instance.
|
||||
class cached_property(object):
|
||||
def __init__(self, func):
|
||||
self.func = func
|
||||
def __get__(self, obj, cls):
|
||||
val = self.func(obj)
|
||||
setattr(obj, self.func.__name__, val)
|
||||
return val
|
||||
|
||||
class MyForm(object):
|
||||
@cached_property
|
||||
def changed_data(self):
|
||||
return [1, 2, 3]
|
||||
|
||||
def test_decorator_class(form):
|
||||
f = MyForm()
|
||||
# OK: cached_property is a descriptor; the actual runtime value is a list.
|
||||
if "name" in f.changed_data:
|
||||
pass
|
||||
|
||||
# Class with dynamically added methods via setattr: we cannot statically
|
||||
# determine its full interface, so we should not flag it.
|
||||
class DynamicProxy(object):
|
||||
def __init__(self, args):
|
||||
self._args = args
|
||||
|
||||
for method_name in ["__contains__", "__iter__", "__len__"]:
|
||||
def wrapper(self, *args, __method_name=method_name):
|
||||
pass
|
||||
setattr(DynamicProxy, method_name, wrapper)
|
||||
|
||||
def test_dynamic_methods():
|
||||
proxy = DynamicProxy(())
|
||||
# OK: __contains__ is added dynamically via setattr.
|
||||
if "name" in proxy:
|
||||
pass
|
||||
|
||||
# isinstance guard should suppress non-container warning
|
||||
def guarded_contains(x):
|
||||
obj = XIter()
|
||||
if isinstance(obj, dict):
|
||||
if x in obj: # OK: guarded by isinstance
|
||||
pass
|
||||
|
||||
def guarded_contains_tuple(x):
|
||||
obj = XIter()
|
||||
if isinstance(obj, (list, dict, set)):
|
||||
if x in obj: # OK: guarded by isinstance with tuple of types
|
||||
pass
|
||||
|
||||
# Negated isinstance guard: early return when NOT a container
|
||||
def guarded_contains_negated(x):
|
||||
obj = XIter()
|
||||
if not isinstance(obj, dict):
|
||||
return
|
||||
if x in obj: # OK: guarded by negated isinstance + early return
|
||||
pass
|
||||
|
||||
@@ -174,9 +174,3 @@ def assert_ok(seq):
|
||||
# False positive. ODASA-8042. Fixed in PR #2401.
|
||||
class false_positive:
|
||||
e = (x for x in [])
|
||||
|
||||
# In class-level scope `locals()` reflects the class namespace,
|
||||
# so modifications do take effect.
|
||||
class MyClass:
|
||||
locals()['x'] = 43 # OK
|
||||
y = x
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Build a local Python extractor pack from source.
|
||||
#
|
||||
# Usage with the CodeQL CLI (run from the repository root):
|
||||
#
|
||||
# codeql database create <db> -l python -s <src> --search-path .
|
||||
# codeql test run --search-path . python/ql/test/<test-dir>
|
||||
#
|
||||
set -eux
|
||||
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
||||
platform="linux64"
|
||||
elif [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
platform="osx64"
|
||||
else
|
||||
echo "Unknown OS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# Build the tsg-python Rust binary
|
||||
(cd extractor/tsg-python && cargo build --release)
|
||||
tsg_bin="extractor/tsg-python/target/release/tsg-python"
|
||||
|
||||
# Generate python3src.zip from the Python extractor source.
|
||||
# make_zips.py creates the zip in the source directory and then copies it to the
|
||||
# given output directory. We use a temporary directory to avoid a same-file copy
|
||||
# error, then move the zip back.
|
||||
tmpdir=$(mktemp -d)
|
||||
trap 'rm -rf "$tmpdir"' EXIT
|
||||
(cd extractor && python3 make_zips.py "$tmpdir")
|
||||
cp "$tmpdir/python3src.zip" extractor/python3src.zip
|
||||
|
||||
# Assemble the extractor pack
|
||||
rm -rf extractor-pack
|
||||
mkdir -p extractor-pack/tools/${platform}
|
||||
|
||||
# Root-level metadata and schema files
|
||||
cp codeql-extractor.yml extractor-pack/
|
||||
cp ql/lib/semmlecode.python.dbscheme extractor-pack/
|
||||
cp ql/lib/semmlecode.python.dbscheme.stats extractor-pack/
|
||||
|
||||
# Python extractor engine files (into tools/)
|
||||
cp extractor/python_tracer.py extractor-pack/tools/
|
||||
cp extractor/index.py extractor-pack/tools/
|
||||
cp extractor/setup.py extractor-pack/tools/
|
||||
cp extractor/convert_setup.py extractor-pack/tools/
|
||||
cp extractor/get_venv_lib.py extractor-pack/tools/
|
||||
cp extractor/imp.py extractor-pack/tools/
|
||||
cp extractor/LICENSE-PSF.md extractor-pack/tools/
|
||||
cp extractor/python3src.zip extractor-pack/tools/
|
||||
cp -r extractor/data extractor-pack/tools/
|
||||
|
||||
# Shell tool scripts (autobuild, pre-finalize, lgtm-scripts)
|
||||
cp tools/autobuild.sh extractor-pack/tools/
|
||||
cp tools/autobuild.cmd extractor-pack/tools/
|
||||
cp tools/pre-finalize.sh extractor-pack/tools/
|
||||
cp tools/pre-finalize.cmd extractor-pack/tools/
|
||||
cp -r tools/lgtm-scripts extractor-pack/tools/
|
||||
|
||||
# Downgrades
|
||||
cp -r downgrades extractor-pack/
|
||||
|
||||
# Platform-specific Rust binary
|
||||
cp "${tsg_bin}" extractor-pack/tools/${platform}/tsg-python
|
||||
@@ -10,10 +10,6 @@
|
||||
* `type, path, kind`
|
||||
* - Summaries:
|
||||
* `type, path, input, output, kind`
|
||||
* - Barriers:
|
||||
* `type, path, kind`
|
||||
* - BarrierGuards:
|
||||
* `type, path, acceptingValue, kind`
|
||||
* - Types:
|
||||
* `type1, type2, path`
|
||||
*
|
||||
@@ -46,8 +42,7 @@
|
||||
* 3. The `input` and `output` columns specify how data enters and leaves the element selected by the
|
||||
* first `(type, path)` tuple. Both strings are `.`-separated access paths
|
||||
* of the same syntax as the `path` column.
|
||||
* 4. The `acceptingValue` column of barrier guard models specifies which branch of the guard is blocking flow. It can be "true" or "false".
|
||||
* 5. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 4. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources `"remote"` indicates a default remote flow source, and for summaries
|
||||
* `"taint"` indicates a default additional taint step and `"value"` indicates a
|
||||
@@ -360,11 +355,11 @@ private predicate barrierModel(string type, string path, string kind, string mod
|
||||
|
||||
/** Holds if a barrier guard model exists for the given parameters. */
|
||||
private predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, string model
|
||||
string type, string path, string branch, string kind, string model
|
||||
) {
|
||||
// No deprecation adapter for barrier models, they were not around back then.
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
Extensions::barrierGuardModel(type, path, acceptingValue, kind, madId) and
|
||||
Extensions::barrierGuardModel(type, path, branch, kind, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
@@ -788,16 +783,16 @@ module ModelOutput {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `acceptingValue`.
|
||||
* Holds if a barrier model contributed `barrier` with the given `kind` for the given `branch`.
|
||||
*/
|
||||
cached
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue, string model) {
|
||||
exists(string type, string path, string acceptingValue_str |
|
||||
acceptingValue = true and acceptingValue_str = "true"
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch, string model) {
|
||||
exists(string type, string path, string branch_str |
|
||||
branch = true and branch_str = "true"
|
||||
or
|
||||
acceptingValue = false and acceptingValue_str = "false"
|
||||
branch = false and branch_str = "false"
|
||||
|
|
||||
barrierGuardModel(type, path, acceptingValue_str, kind, model) and
|
||||
barrierGuardModel(type, path, branch_str, kind, model) and
|
||||
result = getNodeFromPath(type, path)
|
||||
)
|
||||
}
|
||||
@@ -861,12 +856,12 @@ module ModelOutput {
|
||||
API::Node getABarrierNode(string kind) { result = getABarrierNode(kind, _) }
|
||||
|
||||
/**
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `acceptingValue`.
|
||||
* Holds if an external model contributed `barrier-guard` with the given `kind` and `branch`.
|
||||
*
|
||||
* INTERNAL: Do not use.
|
||||
*/
|
||||
API::Node getABarrierGuardNode(string kind, boolean acceptingValue) {
|
||||
result = getABarrierGuardNode(kind, acceptingValue, _)
|
||||
API::Node getABarrierGuardNode(string kind, boolean branch) {
|
||||
result = getABarrierGuardNode(kind, branch, _)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -33,11 +33,11 @@ extensible predicate barrierModel(
|
||||
* of the given `kind` and `madId` is the data extension row number.
|
||||
* `path` is assumed to lead to a parameter of a call (possibly `self`), and
|
||||
* the call is guarding the parameter.
|
||||
* `acceptingValue` is either `true` or `false`, indicating which branch of
|
||||
* the guard is protecting the parameter.
|
||||
* `branch` is either `true` or `false`, indicating which branch of the guard
|
||||
* is protecting the parameter.
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string type, string path, string acceptingValue, string kind, QlBuiltins::ExtensionId madId
|
||||
string type, string path, string branch, string kind, QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -45,16 +45,13 @@ extensible predicate additionalExternalFile(string relativePath);
|
||||
|
||||
/** A file. */
|
||||
class File extends Container, Impl::File {
|
||||
pragma[nomagic]
|
||||
private predicate isAdditionalExternalFile() { additionalExternalFile(this.getRelativePath()) }
|
||||
|
||||
/**
|
||||
* Holds if this file was extracted from the source code of the target project
|
||||
* (rather than another location such as inside a dependency).
|
||||
*/
|
||||
predicate fromSource() {
|
||||
exists(ExtractorStep s | s.getAction() = "Extract" and s.getFile() = this) and
|
||||
not this.isAdditionalExternalFile()
|
||||
not additionalExternalFile(this.getRelativePath())
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1183,12 +1183,12 @@ private module Cached {
|
||||
exists(
|
||||
FlowSummaryImpl::Public::BarrierGuardElement b,
|
||||
FlowSummaryImpl::Private::SummaryComponentStack stack,
|
||||
FlowSummaryImpl::Public::AcceptingValue acceptingValue, string kind, string model
|
||||
FlowSummaryImpl::Public::AcceptingValue acceptingvalue, string kind, string model
|
||||
|
|
||||
FlowSummaryImpl::Private::barrierGuardSpec(b, stack, acceptingValue, kind, model) and
|
||||
FlowSummaryImpl::Private::barrierGuardSpec(b, stack, acceptingvalue, kind, model) and
|
||||
e = FlowSummaryImpl::StepsInput::getSinkNode(b, stack.headOfSingleton()).asExpr() and
|
||||
kmp = TMkPair(kind, model) and
|
||||
gv = convertAcceptingValue(acceptingValue) and
|
||||
gv = convertAcceptingValue(acceptingvalue) and
|
||||
g = b.getCall()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -9,13 +9,6 @@
|
||||
* `path; input; kind; provenance`
|
||||
* - Summaries:
|
||||
* `path; input; output; kind; provenance`
|
||||
* - Barriers:
|
||||
* `path; output; kind; provenance`
|
||||
* - BarrierGuards:
|
||||
* `path; input; acceptingValue; kind; provenance`
|
||||
* - Neutrals:
|
||||
* `path; kind; provenance`
|
||||
* A neutral is used to indicate that a callable is neutral with respect to flow (no summary), source (is not a source) or sink (is not a sink).
|
||||
*
|
||||
* The interpretation of a row is similar to API-graphs with a left-to-right
|
||||
* reading.
|
||||
@@ -41,15 +34,12 @@
|
||||
* - `Field[i]`: the `i`th element of a tuple.
|
||||
* - `Reference`: the referenced value.
|
||||
* - `Future`: the value being computed asynchronously.
|
||||
* 3. The `acceptingValue` column of barrier guard models specifies which branch of the
|
||||
* guard is blocking flow. It can be "true" or "false". In the future
|
||||
* "no-exception", "not-zero", "null", "not-null" may be supported.
|
||||
* 4. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* 3. The `kind` column is a tag that can be referenced from QL to determine to
|
||||
* which classes the interpreted elements should be added. For example, for
|
||||
* sources `"remote"` indicates a default remote flow source, and for summaries
|
||||
* `"taint"` indicates a default additional taint step and `"value"` indicates a
|
||||
* globally applicable value-preserving step.
|
||||
* 5. The `provenance` column is mainly used internally, and should be set to `"manual"` for
|
||||
* 4. The `provenance` column is mainly used internally, and should be set to `"manual"` for
|
||||
* all custom models.
|
||||
*/
|
||||
|
||||
@@ -124,12 +114,11 @@ extensible predicate barrierModel(
|
||||
* extension row number.
|
||||
*
|
||||
* The value referred to by `input` is assumed to lead to an argument of a call
|
||||
* (possibly `self`), and the call is guarding the argument.
|
||||
* `acceptingValue` is either `true` or `false`, indicating which branch of
|
||||
* the guard is protecting the parameter.
|
||||
* (possibly `self`), and the call is guarding the argument. `branch` is either `true`
|
||||
* or `false`, indicating which branch of the guard is protecting the argument.
|
||||
*/
|
||||
extensible predicate barrierGuardModel(
|
||||
string path, string input, string acceptingValue, string kind, string provenance,
|
||||
string path, string input, string branch, string kind, string provenance,
|
||||
QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
@@ -164,9 +153,9 @@ predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) {
|
||||
model = "Barrier: " + path + "; " + output + "; " + kind
|
||||
)
|
||||
or
|
||||
exists(string path, string input, string acceptingValue, string kind |
|
||||
barrierGuardModel(path, input, acceptingValue, kind, _, madId) and
|
||||
model = "Barrier guard: " + path + "; " + input + "; " + acceptingValue + "; " + kind
|
||||
exists(string path, string input, string branch, string kind |
|
||||
barrierGuardModel(path, input, branch, kind, _, madId) and
|
||||
model = "Barrier guard: " + path + "; " + input + "; " + branch + "; " + kind
|
||||
)
|
||||
}
|
||||
|
||||
@@ -276,10 +265,10 @@ private class FlowBarrierGuardFromModel extends FlowBarrierGuard::Range {
|
||||
}
|
||||
|
||||
override predicate isBarrierGuard(
|
||||
string input, string acceptingValue, string kind, Provenance provenance, string model
|
||||
string input, string branch, string kind, Provenance provenance, string model
|
||||
) {
|
||||
exists(QlBuiltins::ExtensionId madId |
|
||||
barrierGuardModel(path, input, acceptingValue, kind, provenance, madId) and
|
||||
barrierGuardModel(path, input, branch, kind, provenance, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -388,11 +388,11 @@ module Make<
|
||||
|
||||
/**
|
||||
* Holds if this element is a flow barrier guard of kind `kind`, for data
|
||||
* flowing in as described by `input`, when `this` evaluates to `acceptingValue`.
|
||||
* flowing in as described by `input`, when `this` evaluates to `branch`.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
abstract predicate isBarrierGuard(
|
||||
string input, string acceptingValue, string kind, Provenance provenance, string model
|
||||
string input, string branch, string kind, Provenance provenance, string model
|
||||
);
|
||||
}
|
||||
|
||||
@@ -764,10 +764,10 @@ module Make<
|
||||
}
|
||||
|
||||
private predicate isRelevantBarrierGuard(
|
||||
BarrierGuardElement e, string input, string acceptingValue, string kind,
|
||||
Provenance provenance, string model
|
||||
BarrierGuardElement e, string input, string branch, string kind, Provenance provenance,
|
||||
string model
|
||||
) {
|
||||
e.isBarrierGuard(input, acceptingValue, kind, provenance, model) and
|
||||
e.isBarrierGuard(input, branch, kind, provenance, model) and
|
||||
(
|
||||
provenance.isManual()
|
||||
or
|
||||
@@ -1588,11 +1588,11 @@ module Make<
|
||||
* Holds if `barrierGuard` is a relevant barrier guard element with input specification `inSpec`.
|
||||
*/
|
||||
predicate barrierGuardSpec(
|
||||
BarrierGuardElement barrierGuard, SummaryComponentStack inSpec, string acceptingValue,
|
||||
string kind, string model
|
||||
BarrierGuardElement barrierGuard, SummaryComponentStack inSpec, string branch, string kind,
|
||||
string model
|
||||
) {
|
||||
exists(string input |
|
||||
isRelevantBarrierGuard(barrierGuard, input, acceptingValue, kind, _, model) and
|
||||
isRelevantBarrierGuard(barrierGuard, input, branch, kind, _, model) and
|
||||
External::interpretSpec(input, inSpec)
|
||||
)
|
||||
}
|
||||
@@ -2189,10 +2189,10 @@ module Make<
|
||||
not exists(interpretComponent(c))
|
||||
}
|
||||
|
||||
/** Holds if `acceptingValue` is not a valid barrier guard accepting-value. */
|
||||
bindingset[acceptingValue]
|
||||
predicate invalidAcceptingValue(string acceptingValue) {
|
||||
not acceptingValue instanceof AcceptingValue
|
||||
/** Holds if `acceptingvalue` is not a valid barrier guard accepting-value. */
|
||||
bindingset[acceptingvalue]
|
||||
predicate invalidAcceptingValue(string acceptingvalue) {
|
||||
not acceptingvalue instanceof AcceptingValue
|
||||
}
|
||||
|
||||
/** Holds if `provenance` is not a valid provenance value. */
|
||||
@@ -2242,10 +2242,10 @@ module Make<
|
||||
|
||||
/**
|
||||
* Holds if an external barrier guard specification exists for `n` with input
|
||||
* specification `input`, accepting value `acceptingValue`, and kind `kind`.
|
||||
* specification `input`, accepting value `acceptingvalue`, and kind `kind`.
|
||||
*/
|
||||
predicate barrierGuardElement(
|
||||
Element n, string input, AcceptingValue acceptingValue, string kind,
|
||||
Element n, string input, AcceptingValue acceptingvalue, string kind,
|
||||
Provenance provenance, string model
|
||||
);
|
||||
|
||||
@@ -2371,11 +2371,11 @@ module Make<
|
||||
}
|
||||
|
||||
private predicate barrierGuardElementRef(
|
||||
InterpretNode ref, SourceSinkAccessPath input, AcceptingValue acceptingValue, string kind,
|
||||
InterpretNode ref, SourceSinkAccessPath input, AcceptingValue acceptingvalue, string kind,
|
||||
string model
|
||||
) {
|
||||
exists(SourceOrSinkElement e |
|
||||
barrierGuardElement(e, input, acceptingValue, kind, _, model) and
|
||||
barrierGuardElement(e, input, acceptingvalue, kind, _, model) and
|
||||
if inputNeedsReferenceExt(input.getToken(0))
|
||||
then e = ref.getCallTarget()
|
||||
else e = ref.asElement()
|
||||
@@ -2518,10 +2518,10 @@ module Make<
|
||||
* given kind in a MaD flow model.
|
||||
*/
|
||||
predicate isBarrierGuardNode(
|
||||
InterpretNode node, AcceptingValue acceptingValue, string kind, string model
|
||||
InterpretNode node, AcceptingValue acceptingvalue, string kind, string model
|
||||
) {
|
||||
exists(InterpretNode ref, SourceSinkAccessPath input |
|
||||
barrierGuardElementRef(ref, input, acceptingValue, kind, model) and
|
||||
barrierGuardElementRef(ref, input, acceptingvalue, kind, model) and
|
||||
interpretInput(input, input.getNumToken(), ref, node)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ signature module ExtensionsSig {
|
||||
*/
|
||||
predicate barrierGuardModel(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance,
|
||||
string input, string acceptingvalue, string kind, string provenance,
|
||||
QlBuiltins::ExtensionId madId
|
||||
);
|
||||
|
||||
@@ -142,14 +142,14 @@ module ModelsAsData<ExtensionsSig Extensions, InputSig Input> {
|
||||
or
|
||||
exists(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance
|
||||
string input, string acceptingvalue, string kind, string provenance
|
||||
|
|
||||
Extensions::barrierGuardModel(namespace, type, subtypes, name, signature, ext, input,
|
||||
acceptingValue, kind, provenance, madId)
|
||||
acceptingvalue, kind, provenance, madId)
|
||||
|
|
||||
model =
|
||||
"Barrier Guard: " + namespace + "; " + type + "; " + subtypes + "; " + name + "; " +
|
||||
signature + "; " + ext + "; " + input + "; " + acceptingValue + "; " + kind + "; " +
|
||||
signature + "; " + ext + "; " + input + "; " + acceptingvalue + "; " + kind + "; " +
|
||||
provenance
|
||||
)
|
||||
or
|
||||
@@ -241,12 +241,12 @@ module ModelsAsData<ExtensionsSig Extensions, InputSig Input> {
|
||||
/** Holds if a barrier guard model exists for the given parameters. */
|
||||
predicate barrierGuardModel(
|
||||
string namespace, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string acceptingValue, string kind, string provenance, string model
|
||||
string input, string acceptingvalue, string kind, string provenance, string model
|
||||
) {
|
||||
exists(string namespaceOrGroup, QlBuiltins::ExtensionId madId |
|
||||
namespace = getNamespace(namespaceOrGroup) and
|
||||
Extensions::barrierGuardModel(namespaceOrGroup, type, subtypes, name, signature, ext, input,
|
||||
acceptingValue, kind, provenance, madId) and
|
||||
acceptingvalue, kind, provenance, madId) and
|
||||
model = "MaD:" + madId.toString()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ module SourceSinkInterpretationInput implements
|
||||
}
|
||||
|
||||
predicate barrierGuardElement(
|
||||
Element n, string input, Public::AcceptingValue acceptingValue, string kind,
|
||||
Element n, string input, Public::AcceptingValue acceptingvalue, string kind,
|
||||
Public::Provenance provenance, string model
|
||||
) {
|
||||
none()
|
||||
|
||||
Reference in New Issue
Block a user