mirror of
https://github.com/github/codeql.git
synced 2026-05-14 19:29:28 +02:00
TypeScript-Go wrapper: binary AST decoder, JSON converter, and tokenizer
Implement the core components for translating tsgo's binary AST format into the JSON format expected by the Java extractor: - decoder.go: Binary AST format parser with random-access node accessors (kind, pos, end, flags, children, strings, extended data) - converter.go: Walks decoded AST and produces JSON matching Node.js wrapper output (augmented , , , , isTypeOnly, HeritageClause token, TypeOperator operator) - childprops.go: Maps ~100 SyntaxKind names to ordered child property name lists for correct bitmask-to-property assignment - scanner.go: TypeScript tokenizer producing array with rescan support for regex, template, and greater-than disambiguation Update metadata.go with correct TS7 SyntaxKind iota values and export metadata functions. Wire decoder+converter through TsgoParser.Parse(). Validation test passes: all 421 diffs are expected TS5-vs-TS7 numeric kind/flags/token/operator value differences. Zero structural diffs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,210 @@
|
||||
package astconv
|
||||
|
||||
// childProps maps SyntaxKind string names to ordered lists of child property names.
|
||||
// The order corresponds to the bitmask order in the binary encoder. When a node
|
||||
// uses the Children data type (top 2 bits = 0b00), the low byte is a bitmask
|
||||
// indicating which of these properties are present. Children are consumed in order.
|
||||
//
|
||||
// These names must match the property names expected by the Java extractor.
|
||||
// Derived from microsoft/typescript-go/internal/api/encoder/encoder.go.
|
||||
var childProps = map[string][]string{
|
||||
// Multi-child nodes with property mask
|
||||
"QualifiedName": {"left", "right"},
|
||||
"TypeParameter": {"modifiers", "name", "constraint", "default"},
|
||||
"IfStatement": {"expression", "thenStatement", "elseStatement"},
|
||||
"DoStatement": {"statement", "expression"},
|
||||
"WhileStatement": {"expression", "statement"},
|
||||
"ForStatement": {"initializer", "condition", "incrementor", "statement"},
|
||||
"ForInStatement": {"awaitModifier", "initializer", "expression", "statement"},
|
||||
"ForOfStatement": {"awaitModifier", "initializer", "expression", "statement"},
|
||||
"WithStatement": {"expression", "statement"},
|
||||
"SwitchStatement": {"expression", "caseBlock"},
|
||||
"CaseClause": {"expression", "statements"},
|
||||
"DefaultClause": {"expression", "statements"},
|
||||
"TryStatement": {"tryBlock", "catchClause", "finallyBlock"},
|
||||
"CatchClause": {"variableDeclaration", "block"},
|
||||
"LabeledStatement": {"label", "statement"},
|
||||
"VariableStatement": {"modifiers", "declarationList"},
|
||||
"VariableDeclarationList": {"declarations"},
|
||||
"VariableDeclaration": {"name", "exclamationToken", "type", "initializer"},
|
||||
"Parameter": {"modifiers", "dotDotDotToken", "name", "questionToken", "type", "initializer"},
|
||||
"BindingElement": {"dotDotDotToken", "propertyName", "name", "initializer"},
|
||||
"FunctionDeclaration": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"InterfaceDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
"TypeAliasDeclaration": {"modifiers", "name", "typeParameters", "type"},
|
||||
"EnumMember": {"name", "initializer"},
|
||||
"EnumDeclaration": {"modifiers", "name", "members"},
|
||||
"ModuleDeclaration": {"modifiers", "name", "body"},
|
||||
"ImportEqualsDeclaration": {"modifiers", "name", "moduleReference"},
|
||||
"ImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
|
||||
"JSImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
|
||||
"ImportSpecifier": {"propertyName", "name"},
|
||||
"ImportClause": {"name", "namedBindings"},
|
||||
"ExportAssignment": {"modifiers", "expression"},
|
||||
"JSExportAssignment": {"modifiers", "expression"},
|
||||
"NamespaceExportDeclaration": {"modifiers", "name"},
|
||||
"ExportDeclaration": {"modifiers", "exportClause", "moduleSpecifier", "attributes"},
|
||||
"ExportSpecifier": {"propertyName", "name"},
|
||||
"CallSignature": {"typeParameters", "parameters", "type"},
|
||||
"ConstructSignature": {"typeParameters", "parameters", "type"},
|
||||
"Constructor": {"modifiers", "typeParameters", "parameters", "type", "body"},
|
||||
"GetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"SetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"IndexSignature": {"modifiers", "parameters", "type"},
|
||||
"MethodSignature": {"modifiers", "name", "questionToken", "typeParameters", "parameters", "type"},
|
||||
"MethodDeclaration": {"modifiers", "asteriskToken", "name", "questionToken", "typeParameters", "parameters", "type", "body"},
|
||||
"PropertySignature": {"modifiers", "name", "questionToken", "type", "initializer"},
|
||||
"PropertyDeclaration": {"modifiers", "name", "questionToken", "type", "initializer"},
|
||||
"BinaryExpression": {"left", "operatorToken", "right"},
|
||||
"YieldExpression": {"asteriskToken", "expression"},
|
||||
"ArrowFunction": {"modifiers", "typeParameters", "parameters", "type", "equalsGreaterThanToken", "body"},
|
||||
"FunctionExpression": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
|
||||
"AsExpression": {"expression", "type"},
|
||||
"SatisfiesExpression": {"expression", "type"},
|
||||
"ConditionalExpression": {"condition", "questionToken", "whenTrue", "colonToken", "whenFalse"},
|
||||
"PropertyAccessExpression": {"expression", "questionDotToken", "name"},
|
||||
"ElementAccessExpression": {"expression", "questionDotToken", "argumentExpression"},
|
||||
"CallExpression": {"expression", "questionDotToken", "typeArguments", "arguments"},
|
||||
"NewExpression": {"expression", "typeArguments", "arguments"},
|
||||
"TemplateExpression": {"head", "templateSpans"},
|
||||
"TemplateSpan": {"expression", "literal"},
|
||||
"TaggedTemplateExpression": {"tag", "questionDotToken", "typeArguments", "template"},
|
||||
"PropertyAssignment": {"modifiers", "name", "questionToken", "initializer"},
|
||||
"ShorthandPropertyAssignment": {"modifiers", "name", "questionToken", "equalsToken", "objectAssignmentInitializer"},
|
||||
"TypeAssertionExpression": {"type", "expression"},
|
||||
"ConditionalType": {"checkType", "extendsType", "trueType", "falseType"},
|
||||
"IndexedAccessType": {"objectType", "indexType"},
|
||||
"TypeReference": {"typeName", "typeArguments"},
|
||||
"ExpressionWithTypeArguments": {"expression", "typeArguments"},
|
||||
"TypePredicate": {"assertsModifier", "parameterName", "type"},
|
||||
"ImportType": {"argument", "attributes", "qualifier", "typeArguments"},
|
||||
"ImportAttribute": {"name", "value"},
|
||||
"TypeQuery": {"exprName", "typeArguments"},
|
||||
"MappedType": {"readonlyToken", "typeParameter", "nameType", "questionToken", "type", "members"},
|
||||
"NamedTupleMember": {"dotDotDotToken", "name", "questionToken", "type"},
|
||||
"FunctionType": {"typeParameters", "parameters", "type"},
|
||||
"ConstructorType": {"modifiers", "typeParameters", "parameters", "type"},
|
||||
"TemplateLiteralType": {"head", "templateSpans"},
|
||||
"TemplateLiteralTypeSpan": {"type", "literal"},
|
||||
"JsxElement": {"openingElement", "children", "closingElement"},
|
||||
"JsxNamespacedName": {"name", "namespace"},
|
||||
"JsxOpeningElement": {"tagName", "typeArguments", "attributes"},
|
||||
"JsxSelfClosingElement": {"tagName", "typeArguments", "attributes"},
|
||||
"JsxFragment": {"openingFragment", "children", "closingFragment"},
|
||||
"JsxAttribute": {"name", "initializer"},
|
||||
"JsxExpression": {"dotDotDotToken", "expression"},
|
||||
"JSDoc": {"comment", "tags"},
|
||||
"JSDocTypeTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocTag": {"tagName", "comment"},
|
||||
"JSDocTemplateTag": {"tagName", "constraint", "typeParameters", "comment"},
|
||||
"JSDocReturnTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocPublicTag": {"tagName", "comment"},
|
||||
"JSDocPrivateTag": {"tagName", "comment"},
|
||||
"JSDocProtectedTag": {"tagName", "comment"},
|
||||
"JSDocReadonlyTag": {"tagName", "comment"},
|
||||
"JSDocOverrideTag": {"tagName", "comment"},
|
||||
"JSDocDeprecatedTag": {"tagName", "comment"},
|
||||
"JSDocSeeTag": {"tagName", "nameExpression", "comment"},
|
||||
"JSDocImplementsTag": {"tagName", "className", "comment"},
|
||||
"JSDocAugmentsTag": {"tagName", "className", "comment"},
|
||||
"JSDocSatisfiesTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocThrowsTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocThisTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocImportTag": {"tagName", "importClause", "moduleSpecifier", "attributes", "comment"},
|
||||
"JSDocCallbackTag": {"tagName", "typeExpression", "fullName", "comment"},
|
||||
"JSDocOverloadTag": {"tagName", "typeExpression", "comment"},
|
||||
"JSDocTypedefTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
"JSDocSignature": {"typeParameters", "parameters", "type"},
|
||||
"ClassStaticBlockDeclaration": {"modifiers", "body"},
|
||||
"ClassDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
"ClassExpression": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
|
||||
|
||||
// JSDocParameterTag and JSDocPropertyTag have order-dependent children
|
||||
// (handled specially in the converter based on isNameFirst defined bit).
|
||||
// Default order (isNameFirst=false):
|
||||
"JSDocParameterTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
"JSDocPropertyTag": {"tagName", "typeExpression", "name", "comment"},
|
||||
}
|
||||
|
||||
// singleChildProp maps node kinds that have exactly one Node child to
|
||||
// the property name for that child.
|
||||
var singleChildProp = map[string]string{
|
||||
"ReturnStatement": "expression",
|
||||
"ThrowStatement": "expression",
|
||||
"ExpressionStatement": "expression",
|
||||
"BreakStatement": "label",
|
||||
"ContinueStatement": "label",
|
||||
"ParenthesizedExpression": "expression",
|
||||
"ComputedPropertyName": "expression",
|
||||
"Decorator": "expression",
|
||||
"SpreadElement": "expression",
|
||||
"SpreadAssignment": "expression",
|
||||
"DeleteExpression": "expression",
|
||||
"TypeOfExpression": "expression",
|
||||
"VoidExpression": "expression",
|
||||
"AwaitExpression": "expression",
|
||||
"NonNullExpression": "expression",
|
||||
"ExternalModuleReference": "expression",
|
||||
"NamespaceImport": "name",
|
||||
"NamespaceExport": "name",
|
||||
"JsxClosingElement": "tagName",
|
||||
"ArrayType": "elementType",
|
||||
"LiteralType": "literal",
|
||||
"InferType": "typeParameter",
|
||||
"OptionalType": "type",
|
||||
"RestType": "type",
|
||||
"ParenthesizedType": "type",
|
||||
"JSDocTypeExpression": "type",
|
||||
"JSDocNonNullableType": "type",
|
||||
"JSDocNullableType": "type",
|
||||
"JSDocVariadicType": "type",
|
||||
"JSDocOptionalType": "type",
|
||||
"JSDocNameReference": "name",
|
||||
}
|
||||
|
||||
// singleNodeListProp maps node kinds that have exactly one NodeList child
|
||||
// to the property name for that child.
|
||||
var singleNodeListProp = map[string]string{
|
||||
"Block": "statements",
|
||||
"ArrayLiteralExpression": "elements",
|
||||
"ObjectLiteralExpression": "properties",
|
||||
"UnionType": "types",
|
||||
"IntersectionType": "types",
|
||||
"TupleType": "elements",
|
||||
"NamedImports": "elements",
|
||||
"NamedExports": "elements",
|
||||
"ModuleBlock": "statements",
|
||||
"CaseBlock": "clauses",
|
||||
"TypeLiteral": "members",
|
||||
"JsxAttributes": "properties",
|
||||
"ArrayBindingPattern": "elements",
|
||||
"ObjectBindingPattern": "elements",
|
||||
"HeritageClause": "types",
|
||||
"JSDocTypeLiteral": "jsDocPropertyTags",
|
||||
}
|
||||
|
||||
// operandKinds are node kinds where the single child is called "operand"
|
||||
// and the operator is encoded in the defined bits.
|
||||
var operandKinds = map[string]bool{
|
||||
"PrefixUnaryExpression": true,
|
||||
"PostfixUnaryExpression": true,
|
||||
}
|
||||
|
||||
// GetChildProperties returns the ordered child property names for the given
|
||||
// SyntaxKind name. Returns nil if the kind has no registered child properties
|
||||
// (leaf node, single-child, or NodeList-child).
|
||||
func GetChildProperties(kindName string) []string {
|
||||
return childProps[kindName]
|
||||
}
|
||||
|
||||
// GetSingleChildProperty returns the property name for a single-child node.
|
||||
// Returns "" if the kind is not a single-child node.
|
||||
func GetSingleChildProperty(kindName string) string {
|
||||
return singleChildProp[kindName]
|
||||
}
|
||||
|
||||
// GetSingleNodeListProperty returns the property name for a single-NodeList-child node.
|
||||
// Returns "" if the kind is not a single-NodeList-child node.
|
||||
func GetSingleNodeListProperty(kindName string) string {
|
||||
return singleNodeListProp[kindName]
|
||||
}
|
||||
@@ -0,0 +1,652 @@
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Converter transforms a BinaryAST into the JSON format expected by the
|
||||
// Java extractor.
|
||||
type Converter struct {
|
||||
ast *BinaryAST
|
||||
kindNames map[uint32]string // numeric kind → string name
|
||||
sourceText string // source file text for $lineStarts / $pos augmentation
|
||||
}
|
||||
|
||||
// NewConverter creates a Converter for the given binary AST.
|
||||
// kindToName maps numeric SyntaxKind values to their string names.
|
||||
func NewConverter(ast *BinaryAST, kindToName map[uint32]string) *Converter {
|
||||
return &Converter{
|
||||
ast: ast,
|
||||
kindNames: kindToName,
|
||||
sourceText: ast.SourceText(),
|
||||
}
|
||||
}
|
||||
|
||||
// Convert transforms the binary AST into a JSON-serializable map.
|
||||
// The root node is at index 1.
|
||||
func (c *Converter) Convert() (map[string]interface{}, error) {
|
||||
if c.ast.NodeCount() < 2 {
|
||||
return nil, fmt.Errorf("no nodes to convert")
|
||||
}
|
||||
return c.convertNode(1)
|
||||
}
|
||||
|
||||
// ConvertJSON is a convenience method that converts to JSON bytes.
|
||||
func (c *Converter) ConvertJSON() (json.RawMessage, error) {
|
||||
obj, err := c.Convert()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return json.Marshal(obj)
|
||||
}
|
||||
|
||||
func (c *Converter) convertNode(i int) (map[string]interface{}, error) {
|
||||
kind := c.ast.Kind(i)
|
||||
kindName := c.kindNames[kind]
|
||||
if kindName == "" {
|
||||
kindName = fmt.Sprintf("Unknown_%d", kind)
|
||||
}
|
||||
|
||||
node := map[string]interface{}{
|
||||
"kind": int(kind),
|
||||
"flags": int(c.ast.Flags(i)),
|
||||
"$pos": c.augmentPos(int(c.ast.Pos(i)), true),
|
||||
"$end": int(c.ast.End(i)),
|
||||
}
|
||||
|
||||
dataType := c.ast.DataType(i)
|
||||
|
||||
switch dataType {
|
||||
case nodeDataTypeString:
|
||||
c.handleStringNode(i, kindName, node)
|
||||
|
||||
case nodeDataTypeExtended:
|
||||
if err := c.handleExtendedNode(i, kindName, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
default: // nodeDataTypeChildren
|
||||
if err := c.handleChildrenNode(i, kindName, node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Add defined-bits-based properties
|
||||
c.addDefinedBitProperties(i, kindName, node)
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// handleStringNode handles nodes with a string property (Identifier, StringLiteral, etc.)
|
||||
func (c *Converter) handleStringNode(i int, kindName string, node map[string]interface{}) {
|
||||
strIdx := c.ast.StringIndex(i)
|
||||
text := c.ast.GetString(strIdx)
|
||||
|
||||
switch kindName {
|
||||
case "Identifier", "PrivateIdentifier":
|
||||
node["escapedText"] = text
|
||||
default:
|
||||
node["text"] = text
|
||||
}
|
||||
}
|
||||
|
||||
// handleExtendedNode handles SourceFile and template literal nodes.
|
||||
func (c *Converter) handleExtendedNode(i int, kindName string, node map[string]interface{}) error {
|
||||
extOff := c.ast.ExtOffset(i)
|
||||
|
||||
switch kindName {
|
||||
case "SourceFile":
|
||||
return c.handleSourceFile(i, extOff, node)
|
||||
case "TemplateHead", "TemplateMiddle", "TemplateTail":
|
||||
c.handleTemplateLiteral(extOff, node)
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unknown extended data node kind: %s", kindName)
|
||||
}
|
||||
}
|
||||
|
||||
// handleSourceFile extracts SourceFile-specific data from extended data.
|
||||
func (c *Converter) handleSourceFile(i int, extOff uint32, node map[string]interface{}) error {
|
||||
// SourceFile extended data layout:
|
||||
// [0-4] textIdx, [4-8] fileNameIdx, [8-12] pathIdx,
|
||||
// [12-16] languageVariant, [16-20] scriptKind,
|
||||
// [20-24] referencedFiles, [24-28] typeReferenceDirectives, [28-32] libReferenceDirectives
|
||||
// [32-36] imports, [36-40] moduleAugmentations, [40-44] ambientModuleNames
|
||||
// [44-48] externalModuleIndicator
|
||||
|
||||
fileNameIdx := c.ast.ExtUint32(extOff + 4)
|
||||
node["fileName"] = c.ast.GetString(fileNameIdx)
|
||||
|
||||
// Add source text
|
||||
if c.sourceText != "" {
|
||||
node["text"] = c.sourceText
|
||||
node["$lineStarts"] = computeLineStarts(c.sourceText)
|
||||
}
|
||||
|
||||
// Add empty parseDiagnostics array (expected by Java extractor)
|
||||
node["parseDiagnostics"] = []interface{}{}
|
||||
|
||||
// Add children (statements + EndOfFile)
|
||||
children := c.ast.Children(i)
|
||||
for _, ci := range children {
|
||||
if c.ast.IsNodeList(ci) {
|
||||
arr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["statements"] = arr
|
||||
}
|
||||
// Skip EndOfFile token — the Java extractor doesn't use it
|
||||
}
|
||||
|
||||
// Generate $tokens by scanning the source text.
|
||||
if c.sourceText != "" {
|
||||
events := c.collectRescanEvents(i)
|
||||
scanner := NewScanner(c.sourceText, events)
|
||||
rawTokens := scanner.ScanAll()
|
||||
tokenArr := make([]interface{}, len(rawTokens))
|
||||
for ti, tok := range rawTokens {
|
||||
tokenArr[ti] = map[string]interface{}{
|
||||
"kind": tok.Kind,
|
||||
"tokenPos": c.augmentPos(tok.TokenPos, false),
|
||||
"text": tok.Text,
|
||||
}
|
||||
}
|
||||
node["$tokens"] = tokenArr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleTemplateLiteral extracts template literal data from extended data.
|
||||
func (c *Converter) handleTemplateLiteral(extOff uint32, node map[string]interface{}) {
|
||||
textIdx := c.ast.ExtUint32(extOff)
|
||||
rawTextIdx := c.ast.ExtUint32(extOff + 4)
|
||||
node["text"] = c.ast.GetString(textIdx)
|
||||
node["rawText"] = c.ast.GetString(rawTextIdx)
|
||||
}
|
||||
|
||||
// handleChildrenNode handles nodes with child properties determined by a bitmask.
|
||||
func (c *Converter) handleChildrenNode(i int, kindName string, node map[string]interface{}) error {
|
||||
children := c.ast.Children(i)
|
||||
|
||||
// Check for single-child nodes
|
||||
if prop := GetSingleChildProperty(kindName); prop != "" {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node[prop] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for single NodeList child nodes
|
||||
if prop := GetSingleNodeListProperty(kindName); prop != "" {
|
||||
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
|
||||
arr, err := c.convertNodeList(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node[prop] = arr
|
||||
} else if len(children) > 0 {
|
||||
// Some single-NodeList nodes may not have a NodeList child
|
||||
// (e.g., JSDocTypeLiteral). Fall through to multi-child handling.
|
||||
} else {
|
||||
node[prop] = []interface{}{}
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check for operator-in-definedBits nodes (PrefixUnaryExpression, PostfixUnaryExpression)
|
||||
if operandKinds[kindName] {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["operand"] = child
|
||||
}
|
||||
node["operator"] = int(c.ast.DefinedBits(i))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Multi-child nodes with property mask
|
||||
props := GetChildProperties(kindName)
|
||||
if props != nil {
|
||||
return c.assignChildProperties(i, kindName, props, children, node)
|
||||
}
|
||||
|
||||
// Token/keyword nodes with no children — nothing to add
|
||||
if len(children) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MetaProperty: keywordToken + name
|
||||
if kindName == "MetaProperty" {
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["name"] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TypeOperator: operator keyword kind inferred from source text + type child
|
||||
if kindName == "TypeOperator" {
|
||||
// Operator (keyof/unique/readonly) is not in the binary encoding.
|
||||
pos := int(c.ast.Pos(i))
|
||||
if c.sourceText != "" && pos < len(c.sourceText) {
|
||||
text := c.sourceText[pos:]
|
||||
// Skip leading trivia
|
||||
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
|
||||
text = text[1:]
|
||||
}
|
||||
if len(text) >= 5 && text[:5] == "keyof" {
|
||||
node["operator"] = int(c.kindForName("KeyOfKeyword"))
|
||||
} else if len(text) >= 6 && text[:6] == "unique" {
|
||||
node["operator"] = int(c.kindForName("UniqueKeyword"))
|
||||
} else if len(text) >= 8 && text[:8] == "readonly" {
|
||||
node["operator"] = int(c.kindForName("ReadonlyKeyword"))
|
||||
}
|
||||
}
|
||||
if len(children) > 0 {
|
||||
child, err := c.convertNode(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["type"] = child
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MissingDeclaration: optional modifiers child
|
||||
if kindName == "MissingDeclaration" {
|
||||
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
|
||||
arr, err := c.convertNodeList(children[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node["modifiers"] = arr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unknown node kind with children — emit them as a generic "children" array
|
||||
arr := make([]interface{}, 0, len(children))
|
||||
for _, ci := range children {
|
||||
if c.ast.IsNodeList(ci) {
|
||||
nlArr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, item := range nlArr {
|
||||
arr = append(arr, item)
|
||||
}
|
||||
} else {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
arr = append(arr, child)
|
||||
}
|
||||
}
|
||||
if len(arr) > 0 {
|
||||
node["children"] = arr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// assignChildProperties distributes children to named properties based on
|
||||
// the bitmask in the node's data field.
|
||||
func (c *Converter) assignChildProperties(nodeIdx int, kindName string, props []string, children []int, node map[string]interface{}) error {
|
||||
mask := c.ast.ChildMask(nodeIdx)
|
||||
definedBits := c.ast.DefinedBits(nodeIdx)
|
||||
|
||||
// Special handling for JSDocParameterTag/JSDocPropertyTag where
|
||||
// child order depends on isNameFirst
|
||||
if (kindName == "JSDocParameterTag" || kindName == "JSDocPropertyTag") && definedBits&2 != 0 {
|
||||
// isNameFirst=true: order is tagName, name, typeExpression, comment
|
||||
props = []string{"tagName", "name", "typeExpression", "comment"}
|
||||
}
|
||||
|
||||
childIdx := 0
|
||||
for bit, prop := range props {
|
||||
if bit < 8 && mask != 0 && mask&(1<<uint(bit)) == 0 {
|
||||
// Property not present per bitmask. For array properties,
|
||||
// emit an empty array (the Java extractor expects them).
|
||||
if isArrayProperty(prop) {
|
||||
node[prop] = []interface{}{}
|
||||
}
|
||||
continue
|
||||
}
|
||||
// If mask is 0 (single-child or no disambiguation needed), consume sequentially
|
||||
if mask == 0 && bit > 0 && childIdx >= len(children) {
|
||||
break
|
||||
}
|
||||
if childIdx >= len(children) {
|
||||
break
|
||||
}
|
||||
|
||||
ci := children[childIdx]
|
||||
childIdx++
|
||||
|
||||
if c.ast.IsNodeList(ci) {
|
||||
arr, err := c.convertNodeList(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
node[prop] = arr
|
||||
} else {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Remap TS7 "postfixToken" (questionToken property) to the correct name
|
||||
// based on the actual token kind. TS7 uses a single PostfixToken
|
||||
// for what TS5 had as separate questionToken/exclamationToken.
|
||||
if prop == "questionToken" {
|
||||
childKind := c.ast.Kind(ci)
|
||||
exclamationKind := c.kindForName("ExclamationToken")
|
||||
if exclamationKind != 0 && childKind == exclamationKind {
|
||||
prop = "exclamationToken"
|
||||
}
|
||||
}
|
||||
node[prop] = child
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// isArrayProperty returns true for property names that should be empty arrays
|
||||
// (not omitted) when absent in the binary AST.
|
||||
func isArrayProperty(prop string) bool {
|
||||
return arrayProperties[prop]
|
||||
}
|
||||
|
||||
var arrayProperties = map[string]bool{
|
||||
"arguments": true,
|
||||
"elements": true,
|
||||
"properties": true,
|
||||
"members": true,
|
||||
}
|
||||
|
||||
// convertNodeList converts a NodeList into a JSON array.
|
||||
func (c *Converter) convertNodeList(i int) ([]interface{}, error) {
|
||||
children := c.ast.Children(i)
|
||||
arr := make([]interface{}, 0, len(children))
|
||||
for _, ci := range children {
|
||||
child, err := c.convertNode(ci)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arr = append(arr, child)
|
||||
}
|
||||
return arr, nil
|
||||
}
|
||||
|
||||
// addDefinedBitProperties adds properties derived from the defined bits
|
||||
// (bits 24-29 of the data field) that aren't part of the child tree.
|
||||
func (c *Converter) addDefinedBitProperties(i int, kindName string, node map[string]interface{}) {
|
||||
definedBits := c.ast.DefinedBits(i)
|
||||
|
||||
switch kindName {
|
||||
case "ImportSpecifier", "ImportEqualsDeclaration", "ExportSpecifier", "ExportDeclaration":
|
||||
node["isTypeOnly"] = definedBits&1 != 0
|
||||
case "ImportClause":
|
||||
node["isTypeOnly"] = definedBits&1 != 0
|
||||
if definedBits&2 != 0 {
|
||||
node["phaseModifier"] = "defer"
|
||||
}
|
||||
case "ImportType":
|
||||
if definedBits&1 != 0 {
|
||||
node["isTypeOf"] = true
|
||||
}
|
||||
case "ExportAssignment", "JSExportAssignment":
|
||||
if definedBits&1 != 0 {
|
||||
node["isExportEquals"] = true
|
||||
}
|
||||
case "VariableDeclarationList":
|
||||
// Determine $declarationKind from defined bits
|
||||
if definedBits&2 != 0 {
|
||||
node["$declarationKind"] = "const"
|
||||
} else if definedBits&1 != 0 {
|
||||
node["$declarationKind"] = "let"
|
||||
} else {
|
||||
node["$declarationKind"] = "var"
|
||||
}
|
||||
case "ImportAttributes":
|
||||
if definedBits&2 != 0 {
|
||||
node["token"] = c.kindForName("AssertKeyword")
|
||||
} else {
|
||||
node["token"] = c.kindForName("WithKeyword")
|
||||
}
|
||||
case "HeritageClause":
|
||||
// Token (extends/implements) is not in the binary encoding.
|
||||
// Infer from source text, skipping leading trivia.
|
||||
pos := int(c.ast.Pos(i))
|
||||
if c.sourceText != "" && pos < len(c.sourceText) {
|
||||
text := c.sourceText[pos:]
|
||||
// Skip whitespace/newlines
|
||||
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
|
||||
text = text[1:]
|
||||
}
|
||||
if len(text) >= 10 && text[:10] == "implements" {
|
||||
node["token"] = int(c.kindForName("ImplementsKeyword"))
|
||||
} else {
|
||||
node["token"] = int(c.kindForName("ExtendsKeyword"))
|
||||
}
|
||||
}
|
||||
case "JSDocParameterTag", "JSDocPropertyTag":
|
||||
if definedBits&1 != 0 {
|
||||
node["isBracketed"] = true
|
||||
}
|
||||
if definedBits&2 != 0 {
|
||||
node["isNameFirst"] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// augmentPos replicates the Node.js wrapper's $pos augmentation:
|
||||
// if skipTrivia is true, advances past leading whitespace and comments.
|
||||
func (c *Converter) augmentPos(pos int, skipTrivia bool) int {
|
||||
if !skipTrivia || c.sourceText == "" || pos >= len(c.sourceText) {
|
||||
return pos
|
||||
}
|
||||
// Skip whitespace and comments (matching the regex /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g)
|
||||
i := pos
|
||||
n := len(c.sourceText)
|
||||
for i < n {
|
||||
ch := c.sourceText[i]
|
||||
if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\v' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if ch == '/' && i+1 < n {
|
||||
next := c.sourceText[i+1]
|
||||
if next == '/' {
|
||||
// Single-line comment — skip to end of line
|
||||
i += 2
|
||||
for i < n && c.sourceText[i] != '\n' {
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
if next == '*' {
|
||||
// Multi-line comment — skip to */
|
||||
i += 2
|
||||
for i+1 < n {
|
||||
if c.sourceText[i] == '*' && c.sourceText[i+1] == '/' {
|
||||
i += 2
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// computeLineStarts returns an array of byte offsets where each line starts.
|
||||
func computeLineStarts(text string) []int {
|
||||
starts := []int{0}
|
||||
for i := 0; i < len(text); i++ {
|
||||
ch := text[i]
|
||||
if ch == '\n' {
|
||||
starts = append(starts, i+1)
|
||||
} else if ch == '\r' {
|
||||
if i+1 < len(text) && text[i+1] == '\n' {
|
||||
i++
|
||||
}
|
||||
starts = append(starts, i+1)
|
||||
}
|
||||
}
|
||||
return starts
|
||||
}
|
||||
|
||||
// kindForName returns the numeric kind for a given string name.
|
||||
// This is the reverse of kindNames. Returns 0 if not found.
|
||||
func (c *Converter) kindForName(name string) uint32 {
|
||||
for k, v := range c.kindNames {
|
||||
if v == name {
|
||||
return k
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// collectRescanEvents walks the AST to find positions that need rescanning.
|
||||
// This matches the Node.js wrapper's rescan logic in ast_extractor.ts.
|
||||
func (c *Converter) collectRescanEvents(root int) []RescanEvent {
|
||||
var events []RescanEvent
|
||||
c.walkForRescan(root, &events)
|
||||
// Sort by position
|
||||
sortRescanEvents(events)
|
||||
return events
|
||||
}
|
||||
|
||||
func (c *Converter) walkForRescan(i int, events *[]RescanEvent) {
|
||||
if i <= 0 || i >= c.ast.NodeCount() {
|
||||
return
|
||||
}
|
||||
if c.ast.IsNodeList(i) {
|
||||
for _, ci := range c.ast.Children(i) {
|
||||
c.walkForRescan(ci, events)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
kind := c.ast.Kind(i)
|
||||
kindName := c.kindNames[kind]
|
||||
|
||||
// RegularExpressionLiteral needs rescan (scanner sees / as SlashToken)
|
||||
if kindName == "RegularExpressionLiteral" {
|
||||
pos := c.augmentPos(int(c.ast.Pos(i)), true)
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "regex"})
|
||||
}
|
||||
|
||||
// TemplateMiddle and TemplateTail need rescan (scanner sees } as CloseBraceToken)
|
||||
if kindName == "TemplateMiddle" || kindName == "TemplateTail" {
|
||||
pos := c.augmentPos(int(c.ast.Pos(i)), true)
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "template"})
|
||||
}
|
||||
|
||||
// BinaryExpression with >>= or >>> etc. needs rescan (scanner may see > separately)
|
||||
if kindName == "BinaryExpression" {
|
||||
children := c.ast.Children(i)
|
||||
if len(children) >= 3 {
|
||||
// BinaryExpression children: left, operatorToken, right
|
||||
opKind := c.kindNames[c.ast.Kind(children[1])]
|
||||
switch opKind {
|
||||
case "GreaterThanEqualsToken", "GreaterThanGreaterThanEqualsToken",
|
||||
"GreaterThanGreaterThanGreaterThanEqualsToken",
|
||||
"GreaterThanGreaterThanGreaterThanToken", "GreaterThanGreaterThanToken":
|
||||
pos := c.augmentPos(int(c.ast.Pos(children[1])), true)
|
||||
*events = append(*events, RescanEvent{Pos: pos, Kind: "greater"})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse into children
|
||||
for _, ci := range c.ast.Children(i) {
|
||||
c.walkForRescan(ci, events)
|
||||
}
|
||||
}
|
||||
|
||||
func sortRescanEvents(events []RescanEvent) {
|
||||
// Simple insertion sort — events are typically few
|
||||
for i := 1; i < len(events); i++ {
|
||||
key := events[i]
|
||||
j := i - 1
|
||||
for j >= 0 && events[j].Pos > key.Pos {
|
||||
events[j+1] = events[j]
|
||||
j--
|
||||
}
|
||||
events[j+1] = key
|
||||
}
|
||||
}
|
||||
|
||||
// FilterWhitelist removes properties from the converted AST that are not
|
||||
// in the property whitelist. This is applied recursively.
|
||||
func FilterWhitelist(obj map[string]interface{}) map[string]interface{} {
|
||||
result := make(map[string]interface{}, len(obj))
|
||||
for k, v := range obj {
|
||||
if !IsAllowedProperty(k) {
|
||||
continue
|
||||
}
|
||||
switch val := v.(type) {
|
||||
case map[string]interface{}:
|
||||
result[k] = FilterWhitelist(val)
|
||||
case []interface{}:
|
||||
result[k] = filterWhitelistArray(val)
|
||||
default:
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func filterWhitelistArray(arr []interface{}) []interface{} {
|
||||
result := make([]interface{}, len(arr))
|
||||
for i, v := range arr {
|
||||
if obj, ok := v.(map[string]interface{}); ok {
|
||||
result[i] = FilterWhitelist(obj)
|
||||
} else {
|
||||
result[i] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// BuildKindToNameMap builds a reverse mapping from numeric kind to string name
|
||||
// from a SyntaxKinds metadata map (name → number).
|
||||
func BuildKindToNameMap(syntaxKinds map[string]int) map[uint32]string {
|
||||
result := make(map[uint32]string, len(syntaxKinds))
|
||||
for name, num := range syntaxKinds {
|
||||
key := uint32(num)
|
||||
// In case of collisions, prefer shorter/simpler names
|
||||
if existing, ok := result[key]; !ok || len(name) < len(existing) {
|
||||
result[key] = name
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// StripKindPrefix removes "Kind" prefix from names if present (for TS7 Go-style names).
|
||||
func StripKindPrefix(name string) string {
|
||||
if strings.HasPrefix(name, "Kind") {
|
||||
return name[4:]
|
||||
}
|
||||
return name
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
// Package astconv decodes the binary AST format produced by the tsgo API
|
||||
// and converts it to the JSON format expected by the Java extractor.
|
||||
//
|
||||
// The binary format is documented in microsoft/typescript-go/internal/api/encoder/encoder.go.
|
||||
// Each source file is encoded as:
|
||||
//
|
||||
// Header (44 bytes) | String offsets | String data | Extended data | Structured data | Nodes (28 bytes each)
|
||||
//
|
||||
// Nodes are in a flat array with parent/next-sibling indices. The first node (index 0)
|
||||
// is a nil sentinel. The root node is at index 1.
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Binary format constants matching microsoft/typescript-go/internal/api/encoder.
|
||||
const (
|
||||
nodeSize = 28 // 7 × uint32
|
||||
|
||||
nodeOffsetKind = 0
|
||||
nodeOffsetPos = 4
|
||||
nodeOffsetEnd = 8
|
||||
nodeOffsetNext = 12
|
||||
nodeOffsetParent = 16
|
||||
nodeOffsetData = 20
|
||||
nodeOffsetFlags = 24
|
||||
|
||||
headerSize = 44
|
||||
headerOffsetMetadata = 0
|
||||
headerOffsetStringOff = 24
|
||||
headerOffsetStringData = 28
|
||||
headerOffsetExtData = 32
|
||||
headerOffsetStructData = 36
|
||||
headerOffsetNodes = 40
|
||||
|
||||
protocolVersion uint8 = 5
|
||||
|
||||
nodeDataTypeChildren uint32 = 0x00_00_00_00
|
||||
nodeDataTypeString uint32 = 0x40_00_00_00
|
||||
nodeDataTypeExtended uint32 = 0x80_00_00_00
|
||||
|
||||
nodeDataTypeMask uint32 = 0xC0_00_00_00
|
||||
nodeDataChildMask uint32 = 0x00_00_00_FF
|
||||
nodeDataStringMask uint32 = 0x00_FF_FF_FF
|
||||
|
||||
// SyntaxKindNodeList is the special kind value used for NodeList nodes.
|
||||
SyntaxKindNodeList uint32 = 0xFF_FF_FF_FF
|
||||
)
|
||||
|
||||
// BinaryAST provides random access to nodes in a binary-encoded TypeScript AST.
|
||||
type BinaryAST struct {
|
||||
raw []byte
|
||||
strOff uint32 // byte offset to string offset pairs
|
||||
strData uint32 // byte offset to string data
|
||||
extData uint32 // byte offset to extended node data
|
||||
structOff uint32 // byte offset to structured data
|
||||
nodeOff uint32 // byte offset to nodes section
|
||||
nodeCount int
|
||||
// Single Go string covering all data from strData onward.
|
||||
// String offsets index into this, so substrings are zero-alloc.
|
||||
allStrData string
|
||||
}
|
||||
|
||||
// DecodeBinaryAST parses the binary header and returns a BinaryAST for
|
||||
// random-access to nodes and strings.
|
||||
func DecodeBinaryAST(data []byte) (*BinaryAST, error) {
|
||||
if len(data) < headerSize {
|
||||
return nil, fmt.Errorf("data too short: %d bytes (need %d)", len(data), headerSize)
|
||||
}
|
||||
|
||||
version := data[headerOffsetMetadata+3]
|
||||
if version != protocolVersion {
|
||||
return nil, fmt.Errorf("unsupported protocol version %d (expected %d)", version, protocolVersion)
|
||||
}
|
||||
|
||||
b := &BinaryAST{
|
||||
raw: data,
|
||||
strOff: le32(data, headerOffsetStringOff),
|
||||
strData: le32(data, headerOffsetStringData),
|
||||
extData: le32(data, headerOffsetExtData),
|
||||
structOff: le32(data, headerOffsetStructData),
|
||||
nodeOff: le32(data, headerOffsetNodes),
|
||||
}
|
||||
|
||||
dataLen := uint32(len(data))
|
||||
if b.strOff > dataLen || b.strData > dataLen || b.extData > dataLen || b.nodeOff > dataLen {
|
||||
return nil, fmt.Errorf("invalid header offsets exceed data length %d", dataLen)
|
||||
}
|
||||
|
||||
b.nodeCount = (len(data) - int(b.nodeOff)) / nodeSize
|
||||
if b.nodeCount < 2 {
|
||||
return nil, fmt.Errorf("no nodes in AST (count=%d, need at least 2)", b.nodeCount)
|
||||
}
|
||||
|
||||
// The official decoder uses data[strData:] for zero-alloc substring slicing.
|
||||
b.allStrData = string(data[b.strData:])
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// DecodeBinaryASTFromBase64 decodes a base64-encoded binary AST, as returned
|
||||
// by tsgo's getSourceFile API in JSON ({"data":"<base64>"}).
|
||||
func DecodeBinaryASTFromBase64(b64 string) (*BinaryAST, error) {
|
||||
data, err := base64.StdEncoding.DecodeString(b64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("base64 decode failed: %w", err)
|
||||
}
|
||||
return DecodeBinaryAST(data)
|
||||
}
|
||||
|
||||
// NodeCount returns the total number of nodes (including the nil sentinel at index 0).
|
||||
func (b *BinaryAST) NodeCount() int { return b.nodeCount }
|
||||
|
||||
// Node field accessors — all read uint32 from the nodes section.
|
||||
|
||||
func (b *BinaryAST) nf(i, offset int) uint32 {
|
||||
return le32(b.raw, int(b.nodeOff)+i*nodeSize+offset)
|
||||
}
|
||||
|
||||
// Kind returns the SyntaxKind of node i.
|
||||
func (b *BinaryAST) Kind(i int) uint32 { return b.nf(i, nodeOffsetKind) }
|
||||
|
||||
// Pos returns the start position (UTF-16 offset) of node i.
|
||||
func (b *BinaryAST) Pos(i int) uint32 { return b.nf(i, nodeOffsetPos) }
|
||||
|
||||
// End returns the end position (UTF-16 offset) of node i.
|
||||
func (b *BinaryAST) End(i int) uint32 { return b.nf(i, nodeOffsetEnd) }
|
||||
|
||||
// Next returns the index of the next sibling of node i, or 0 if none.
|
||||
func (b *BinaryAST) Next(i int) uint32 { return b.nf(i, nodeOffsetNext) }
|
||||
|
||||
// Parent returns the index of the parent of node i, or 0 if none.
|
||||
func (b *BinaryAST) Parent(i int) uint32 { return b.nf(i, nodeOffsetParent) }
|
||||
|
||||
// Data returns the raw 32-bit data field of node i.
|
||||
func (b *BinaryAST) Data(i int) uint32 { return b.nf(i, nodeOffsetData) }
|
||||
|
||||
// Flags returns the NodeFlags of node i.
|
||||
func (b *BinaryAST) Flags(i int) uint32 { return b.nf(i, nodeOffsetFlags) }
|
||||
|
||||
// DataType returns the top 2 bits of the data field (Children, String, or Extended).
|
||||
func (b *BinaryAST) DataType(i int) uint32 { return b.Data(i) & nodeDataTypeMask }
|
||||
|
||||
// DefinedBits returns bits 24-29 of the data field (6 bits of per-node-type flags).
|
||||
func (b *BinaryAST) DefinedBits(i int) uint8 { return uint8((b.Data(i) >> 24) & 0x3F) }
|
||||
|
||||
// ChildMask returns the low byte of the data field (child property bitmask).
|
||||
func (b *BinaryAST) ChildMask(i int) uint8 { return uint8(b.Data(i) & nodeDataChildMask) }
|
||||
|
||||
// StringIndex returns the 24-bit string table index from the data field.
|
||||
func (b *BinaryAST) StringIndex(i int) uint32 { return b.Data(i) & nodeDataStringMask }
|
||||
|
||||
// ExtOffset returns the 24-bit offset into the extended data section from the data field.
|
||||
func (b *BinaryAST) ExtOffset(i int) uint32 { return b.Data(i) & nodeDataStringMask }
|
||||
|
||||
// NodeListLen returns the number of children for a NodeList node (stored in data field).
|
||||
func (b *BinaryAST) NodeListLen(i int) uint32 { return b.Data(i) }
|
||||
|
||||
// IsNodeList returns true if node i is a NodeList.
|
||||
func (b *BinaryAST) IsNodeList(i int) bool { return b.Kind(i) == SyntaxKindNodeList }
|
||||
|
||||
// GetString reads a string from the string table at the given offset index.
|
||||
// The index comes from a String-type node's data field (24-bit value).
|
||||
func (b *BinaryAST) GetString(idx uint32) string {
|
||||
// Each string entry is two uint32 values (start, end) in the string offsets section.
|
||||
offBase := int(b.strOff) + int(idx)*4
|
||||
start := le32(b.raw, offBase)
|
||||
end := le32(b.raw, offBase+4)
|
||||
return b.allStrData[start:end]
|
||||
}
|
||||
|
||||
// ExtUint32 reads a uint32 from the extended data section at the given byte offset.
|
||||
func (b *BinaryAST) ExtUint32(off uint32) uint32 {
|
||||
return le32(b.raw, int(b.extData)+int(off))
|
||||
}
|
||||
|
||||
// Children returns the indices of all direct children of node i.
|
||||
// Children are identified by having parent == i. The first child is at i+1
|
||||
// (if its parent is i), and subsequent children are found via Next pointers.
|
||||
func (b *BinaryAST) Children(i int) []int {
|
||||
if i+1 >= b.nodeCount {
|
||||
return nil
|
||||
}
|
||||
firstChild := i + 1
|
||||
if b.Parent(firstChild) != uint32(i) {
|
||||
return nil
|
||||
}
|
||||
children := []int{firstChild}
|
||||
next := int(b.Next(firstChild))
|
||||
for next != 0 {
|
||||
children = append(children, next)
|
||||
next = int(b.Next(next))
|
||||
}
|
||||
return children
|
||||
}
|
||||
|
||||
// SourceText returns the source file text, extracted from the SourceFile's
|
||||
// extended data. Returns "" if the root node is not a SourceFile or if
|
||||
// the extended data is missing.
|
||||
func (b *BinaryAST) SourceText() string {
|
||||
if b.nodeCount < 2 {
|
||||
return ""
|
||||
}
|
||||
// Root is at index 1. Check if it has extended data type.
|
||||
if b.DataType(1)&nodeDataTypeMask != nodeDataTypeExtended {
|
||||
return ""
|
||||
}
|
||||
extOff := b.ExtOffset(1)
|
||||
textIdx := b.ExtUint32(extOff)
|
||||
return b.GetString(textIdx)
|
||||
}
|
||||
|
||||
func le32(data []byte, offset int) uint32 {
|
||||
if offset < 0 || offset+4 > len(data) {
|
||||
return 0
|
||||
}
|
||||
return binary.LittleEndian.Uint32(data[offset : offset+4])
|
||||
}
|
||||
@@ -0,0 +1,842 @@
|
||||
package astconv
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TS7 SyntaxKind values for tokens (from microsoft/typescript-go internal/ast/kind.go).
|
||||
const (
|
||||
KindUnknown = 0
|
||||
KindEndOfFile = 1
|
||||
KindSingleLineCommentTrivia = 2
|
||||
KindMultiLineCommentTrivia = 3
|
||||
KindNewLineTrivia = 4
|
||||
KindWhitespaceTrivia = 5
|
||||
KindConflictMarkerTrivia = 6
|
||||
KindNumericLiteral = 8
|
||||
KindBigIntLiteral = 9
|
||||
KindStringLiteral = 10
|
||||
KindRegularExpressionLiteral = 13
|
||||
KindNoSubstitutionTemplateLiteral = 14
|
||||
KindTemplateHead = 15
|
||||
KindTemplateMiddle = 16
|
||||
KindTemplateTail = 17
|
||||
KindOpenBraceToken = 18
|
||||
KindCloseBraceToken = 19
|
||||
KindOpenParenToken = 20
|
||||
KindCloseParenToken = 21
|
||||
KindOpenBracketToken = 22
|
||||
KindCloseBracketToken = 23
|
||||
KindDotToken = 24
|
||||
KindDotDotDotToken = 25
|
||||
KindSemicolonToken = 26
|
||||
KindCommaToken = 27
|
||||
KindQuestionDotToken = 28
|
||||
KindLessThanToken = 29
|
||||
KindLessThanSlashToken = 30
|
||||
KindGreaterThanToken = 31
|
||||
KindLessThanEqualsToken = 32
|
||||
KindGreaterThanEqualsToken = 33
|
||||
KindEqualsEqualsToken = 34
|
||||
KindExclamationEqualsToken = 35
|
||||
KindEqualsEqualsEqualsToken = 36
|
||||
KindExclamationEqualsEqualsToken = 37
|
||||
KindEqualsGreaterThanToken = 38
|
||||
KindPlusToken = 39
|
||||
KindMinusToken = 40
|
||||
KindAsteriskToken = 41
|
||||
KindAsteriskAsteriskToken = 42
|
||||
KindSlashToken = 43
|
||||
KindPercentToken = 44
|
||||
KindPlusPlusToken = 45
|
||||
KindMinusMinusToken = 46
|
||||
KindLessThanLessThanToken = 47
|
||||
KindGreaterThanGreaterThanToken = 48
|
||||
KindGreaterThanGreaterThanGreaterThanToken = 49
|
||||
KindAmpersandToken = 50
|
||||
KindBarToken = 51
|
||||
KindCaretToken = 52
|
||||
KindExclamationToken = 53
|
||||
KindTildeToken = 54
|
||||
KindAmpersandAmpersandToken = 55
|
||||
KindBarBarToken = 56
|
||||
KindQuestionToken = 57
|
||||
KindColonToken = 58
|
||||
KindAtToken = 59
|
||||
KindQuestionQuestionToken = 60
|
||||
KindHashToken = 62
|
||||
KindEqualsToken = 63
|
||||
KindPlusEqualsToken = 64
|
||||
KindMinusEqualsToken = 65
|
||||
KindAsteriskEqualsToken = 66
|
||||
KindAsteriskAsteriskEqualsToken = 67
|
||||
KindSlashEqualsToken = 68
|
||||
KindPercentEqualsToken = 69
|
||||
KindLessThanLessThanEqualsToken = 70
|
||||
KindGreaterThanGreaterThanEqualsToken = 71
|
||||
KindGreaterThanGreaterThanGreaterThanEqualsToken = 72
|
||||
KindAmpersandEqualsToken = 73
|
||||
KindBarEqualsToken = 74
|
||||
KindBarBarEqualsToken = 75
|
||||
KindAmpersandAmpersandEqualsToken = 76
|
||||
KindQuestionQuestionEqualsToken = 77
|
||||
KindCaretEqualsToken = 78
|
||||
KindIdentifier = 79
|
||||
KindPrivateIdentifier = 80
|
||||
)
|
||||
|
||||
// Token represents a single token from the scanner.
|
||||
type Token struct {
|
||||
Kind int `json:"kind"`
|
||||
TokenPos int `json:"tokenPos"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// RescanEvent tells the scanner to rescan at a given position.
|
||||
type RescanEvent struct {
|
||||
Pos int
|
||||
Kind string // "regex", "template", "greater"
|
||||
}
|
||||
|
||||
// Scanner tokenizes TypeScript source text.
|
||||
type Scanner struct {
|
||||
text string
|
||||
pos int
|
||||
events []RescanEvent
|
||||
evIdx int
|
||||
}
|
||||
|
||||
// NewScanner creates a scanner for the given source text.
|
||||
// rescanEvents should be sorted by position. They inform the scanner
|
||||
// about positions where regex literals, template tokens, or greater-than
|
||||
// rescanning is needed (matching the Node.js wrapper behavior).
|
||||
func NewScanner(text string, rescanEvents []RescanEvent) *Scanner {
|
||||
return &Scanner{
|
||||
text: text,
|
||||
pos: 0,
|
||||
events: rescanEvents,
|
||||
evIdx: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// ScanAll produces all tokens from the source text.
|
||||
func (s *Scanner) ScanAll() []Token {
|
||||
var tokens []Token
|
||||
for {
|
||||
tok := s.scan()
|
||||
tokens = append(tokens, tok)
|
||||
if tok.Kind == KindEndOfFile {
|
||||
break
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
func (s *Scanner) peek() byte {
|
||||
if s.pos >= len(s.text) {
|
||||
return 0
|
||||
}
|
||||
return s.text[s.pos]
|
||||
}
|
||||
|
||||
func (s *Scanner) peekAt(offset int) byte {
|
||||
p := s.pos + offset
|
||||
if p >= len(s.text) {
|
||||
return 0
|
||||
}
|
||||
return s.text[p]
|
||||
}
|
||||
|
||||
func (s *Scanner) advance() {
|
||||
s.pos++
|
||||
}
|
||||
|
||||
func (s *Scanner) nextRescanPos() int {
|
||||
if s.evIdx < len(s.events) {
|
||||
return s.events[s.evIdx].Pos
|
||||
}
|
||||
return int(^uint(0) >> 1) // MaxInt
|
||||
}
|
||||
|
||||
func (s *Scanner) nextRescanKind() string {
|
||||
if s.evIdx < len(s.events) {
|
||||
return s.events[s.evIdx].Kind
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s *Scanner) consumeRescan() {
|
||||
if s.evIdx < len(s.events) {
|
||||
s.evIdx++
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scan() Token {
|
||||
if s.pos >= len(s.text) {
|
||||
return Token{Kind: KindEndOfFile, TokenPos: s.pos, Text: ""}
|
||||
}
|
||||
|
||||
tokenPos := s.pos
|
||||
ch := s.peek()
|
||||
|
||||
// Whitespace (not newlines)
|
||||
if ch == ' ' || ch == '\t' || ch == '\f' || ch == '\v' {
|
||||
for s.pos < len(s.text) {
|
||||
c := s.text[s.pos]
|
||||
if c == ' ' || c == '\t' || c == '\f' || c == '\v' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return Token{Kind: KindWhitespaceTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
// Newlines
|
||||
if ch == '\n' {
|
||||
s.advance()
|
||||
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: "\n"}
|
||||
}
|
||||
if ch == '\r' {
|
||||
s.advance()
|
||||
if s.peek() == '\n' {
|
||||
s.advance()
|
||||
}
|
||||
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
// Check for rescan event at this position
|
||||
if tokenPos == s.nextRescanPos() {
|
||||
kind := s.nextRescanKind()
|
||||
s.consumeRescan()
|
||||
switch kind {
|
||||
case "regex":
|
||||
return s.scanRegExp(tokenPos)
|
||||
case "template":
|
||||
return s.scanTemplatePart(tokenPos, true)
|
||||
case "greater":
|
||||
return s.scanGreater(tokenPos)
|
||||
}
|
||||
}
|
||||
|
||||
switch ch {
|
||||
case '/':
|
||||
next := s.peekAt(1)
|
||||
if next == '/' {
|
||||
return s.scanSingleLineComment(tokenPos)
|
||||
}
|
||||
if next == '*' {
|
||||
return s.scanMultiLineComment(tokenPos)
|
||||
}
|
||||
if next == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindSlashEqualsToken, TokenPos: tokenPos, Text: "/="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindSlashToken, TokenPos: tokenPos, Text: "/"}
|
||||
|
||||
case '\'', '"':
|
||||
return s.scanString(tokenPos, ch)
|
||||
|
||||
case '`':
|
||||
return s.scanTemplatePart(tokenPos, false)
|
||||
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
return s.scanNumber(tokenPos)
|
||||
|
||||
case '{':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenBraceToken, TokenPos: tokenPos, Text: "{"}
|
||||
case '}':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseBraceToken, TokenPos: tokenPos, Text: "}"}
|
||||
case '(':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenParenToken, TokenPos: tokenPos, Text: "("}
|
||||
case ')':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseParenToken, TokenPos: tokenPos, Text: ")"}
|
||||
case '[':
|
||||
s.advance()
|
||||
return Token{Kind: KindOpenBracketToken, TokenPos: tokenPos, Text: "["}
|
||||
case ']':
|
||||
s.advance()
|
||||
return Token{Kind: KindCloseBracketToken, TokenPos: tokenPos, Text: "]"}
|
||||
case ';':
|
||||
s.advance()
|
||||
return Token{Kind: KindSemicolonToken, TokenPos: tokenPos, Text: ";"}
|
||||
case ',':
|
||||
s.advance()
|
||||
return Token{Kind: KindCommaToken, TokenPos: tokenPos, Text: ","}
|
||||
case '~':
|
||||
s.advance()
|
||||
return Token{Kind: KindTildeToken, TokenPos: tokenPos, Text: "~"}
|
||||
case '@':
|
||||
s.advance()
|
||||
return Token{Kind: KindAtToken, TokenPos: tokenPos, Text: "@"}
|
||||
|
||||
case '.':
|
||||
if s.peekAt(1) == '.' && s.peekAt(2) == '.' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindDotDotDotToken, TokenPos: tokenPos, Text: "..."}
|
||||
}
|
||||
// .123 numeric literal
|
||||
if s.peekAt(1) >= '0' && s.peekAt(1) <= '9' {
|
||||
return s.scanNumber(tokenPos)
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindDotToken, TokenPos: tokenPos, Text: "."}
|
||||
|
||||
case ':':
|
||||
s.advance()
|
||||
return Token{Kind: KindColonToken, TokenPos: tokenPos, Text: ":"}
|
||||
|
||||
case '?':
|
||||
if s.peekAt(1) == '.' && !(s.peekAt(2) >= '0' && s.peekAt(2) <= '9') {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindQuestionDotToken, TokenPos: tokenPos, Text: "?."}
|
||||
}
|
||||
if s.peekAt(1) == '?' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindQuestionQuestionEqualsToken, TokenPos: tokenPos, Text: "??="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindQuestionQuestionToken, TokenPos: tokenPos, Text: "??"}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindQuestionToken, TokenPos: tokenPos, Text: "?"}
|
||||
|
||||
case '!':
|
||||
if s.peekAt(1) == '=' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindExclamationEqualsEqualsToken, TokenPos: tokenPos, Text: "!=="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindExclamationEqualsToken, TokenPos: tokenPos, Text: "!="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindExclamationToken, TokenPos: tokenPos, Text: "!"}
|
||||
|
||||
case '=':
|
||||
if s.peekAt(1) == '=' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindEqualsEqualsEqualsToken, TokenPos: tokenPos, Text: "==="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindEqualsEqualsToken, TokenPos: tokenPos, Text: "=="}
|
||||
}
|
||||
if s.peekAt(1) == '>' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindEqualsGreaterThanToken, TokenPos: tokenPos, Text: "=>"}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindEqualsToken, TokenPos: tokenPos, Text: "="}
|
||||
|
||||
case '+':
|
||||
if s.peekAt(1) == '+' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPlusPlusToken, TokenPos: tokenPos, Text: "++"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPlusEqualsToken, TokenPos: tokenPos, Text: "+="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindPlusToken, TokenPos: tokenPos, Text: "+"}
|
||||
|
||||
case '-':
|
||||
if s.peekAt(1) == '-' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMinusMinusToken, TokenPos: tokenPos, Text: "--"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMinusEqualsToken, TokenPos: tokenPos, Text: "-="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindMinusToken, TokenPos: tokenPos, Text: "-"}
|
||||
|
||||
case '*':
|
||||
if s.peekAt(1) == '*' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindAsteriskAsteriskEqualsToken, TokenPos: tokenPos, Text: "**="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAsteriskAsteriskToken, TokenPos: tokenPos, Text: "**"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAsteriskEqualsToken, TokenPos: tokenPos, Text: "*="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindAsteriskToken, TokenPos: tokenPos, Text: "*"}
|
||||
|
||||
case '%':
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindPercentEqualsToken, TokenPos: tokenPos, Text: "%="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindPercentToken, TokenPos: tokenPos, Text: "%"}
|
||||
|
||||
case '<':
|
||||
if s.peekAt(1) == '<' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindLessThanLessThanEqualsToken, TokenPos: tokenPos, Text: "<<="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanLessThanToken, TokenPos: tokenPos, Text: "<<"}
|
||||
}
|
||||
if s.peekAt(1) == '/' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanSlashToken, TokenPos: tokenPos, Text: "</"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindLessThanEqualsToken, TokenPos: tokenPos, Text: "<="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindLessThanToken, TokenPos: tokenPos, Text: "<"}
|
||||
|
||||
case '>':
|
||||
return s.scanGreater(tokenPos)
|
||||
|
||||
case '&':
|
||||
if s.peekAt(1) == '&' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindAmpersandAmpersandEqualsToken, TokenPos: tokenPos, Text: "&&="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAmpersandAmpersandToken, TokenPos: tokenPos, Text: "&&"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindAmpersandEqualsToken, TokenPos: tokenPos, Text: "&="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindAmpersandToken, TokenPos: tokenPos, Text: "&"}
|
||||
|
||||
case '|':
|
||||
if s.peekAt(1) == '|' {
|
||||
if s.peekAt(2) == '=' {
|
||||
s.pos += 3
|
||||
return Token{Kind: KindBarBarEqualsToken, TokenPos: tokenPos, Text: "||="}
|
||||
}
|
||||
s.pos += 2
|
||||
return Token{Kind: KindBarBarToken, TokenPos: tokenPos, Text: "||"}
|
||||
}
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindBarEqualsToken, TokenPos: tokenPos, Text: "|="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindBarToken, TokenPos: tokenPos, Text: "|"}
|
||||
|
||||
case '^':
|
||||
if s.peekAt(1) == '=' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindCaretEqualsToken, TokenPos: tokenPos, Text: "^="}
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindCaretToken, TokenPos: tokenPos, Text: "^"}
|
||||
|
||||
case '#':
|
||||
// Could be private identifier
|
||||
if s.peekAt(1) == '!' && tokenPos == 0 {
|
||||
// Shebang — scan to end of line
|
||||
return s.scanSingleLineComment(tokenPos)
|
||||
}
|
||||
if isIdentStart(s.peekAt(1)) {
|
||||
return s.scanPrivateIdentifier(tokenPos)
|
||||
}
|
||||
s.advance()
|
||||
return Token{Kind: KindHashToken, TokenPos: tokenPos, Text: "#"}
|
||||
}
|
||||
|
||||
// Identifier or keyword
|
||||
if isIdentStartByte(ch) {
|
||||
return s.scanIdentifierOrKeyword(tokenPos)
|
||||
}
|
||||
|
||||
// Handle multi-byte Unicode identifier starts
|
||||
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
|
||||
if r != utf8.RuneError && isIdentStartRune(r) {
|
||||
return s.scanIdentifierOrKeyword(tokenPos)
|
||||
}
|
||||
|
||||
// Unknown character
|
||||
s.pos += size
|
||||
return Token{Kind: KindUnknown, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanSingleLineComment(start int) Token {
|
||||
s.pos += 2 // skip //
|
||||
for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindSingleLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanMultiLineComment(start int) Token {
|
||||
s.pos += 2 // skip /*
|
||||
for s.pos < len(s.text)-1 {
|
||||
if s.text[s.pos] == '*' && s.text[s.pos+1] == '/' {
|
||||
s.pos += 2
|
||||
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
// Unterminated
|
||||
s.pos = len(s.text)
|
||||
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanString(start int, quote byte) Token {
|
||||
s.advance() // skip opening quote
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == quote {
|
||||
s.advance()
|
||||
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '\n' || ch == '\r' {
|
||||
// Unterminated string
|
||||
break
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanTemplatePart(start int, isRescan bool) Token {
|
||||
if isRescan {
|
||||
// We're at a '}' that needs to be rescanned as TemplateMiddle or TemplateTail
|
||||
s.advance() // skip }
|
||||
} else {
|
||||
s.advance() // skip `
|
||||
}
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == '`' {
|
||||
s.advance()
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '$' && s.peekAt(1) == '{' {
|
||||
s.pos += 2
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateMiddle, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindTemplateHead, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
// Unterminated
|
||||
if isRescan {
|
||||
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanRegExp(start int) Token {
|
||||
s.advance() // skip /
|
||||
inCharClass := false
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '\\' {
|
||||
s.pos += 2
|
||||
continue
|
||||
}
|
||||
if ch == '[' {
|
||||
inCharClass = true
|
||||
s.pos++
|
||||
continue
|
||||
}
|
||||
if ch == ']' {
|
||||
inCharClass = false
|
||||
s.pos++
|
||||
continue
|
||||
}
|
||||
if ch == '/' && !inCharClass {
|
||||
s.advance() // skip closing /
|
||||
// Scan flags
|
||||
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
if ch == '\n' || ch == '\r' {
|
||||
break
|
||||
}
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanGreater(start int) Token {
|
||||
s.advance() // skip >
|
||||
if s.peek() == '>' {
|
||||
s.advance()
|
||||
if s.peek() == '>' {
|
||||
s.advance()
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>>="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanGreaterThanGreaterThanToken, TokenPos: start, Text: ">>>"}
|
||||
}
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanGreaterThanToken, TokenPos: start, Text: ">>"}
|
||||
}
|
||||
if s.peek() == '=' {
|
||||
s.advance()
|
||||
return Token{Kind: KindGreaterThanEqualsToken, TokenPos: start, Text: ">="}
|
||||
}
|
||||
return Token{Kind: KindGreaterThanToken, TokenPos: start, Text: ">"}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanNumber(start int) Token {
|
||||
if s.peek() == '0' {
|
||||
next := s.peekAt(1)
|
||||
if next == 'x' || next == 'X' {
|
||||
s.pos += 2
|
||||
s.scanHexDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
if next == 'b' || next == 'B' {
|
||||
s.pos += 2
|
||||
s.scanBinaryDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
if next == 'o' || next == 'O' {
|
||||
s.pos += 2
|
||||
s.scanOctalDigits()
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
}
|
||||
|
||||
s.scanDecimalDigits()
|
||||
if s.peek() == '.' {
|
||||
s.advance()
|
||||
s.scanDecimalDigits()
|
||||
}
|
||||
if s.peek() == 'e' || s.peek() == 'E' {
|
||||
s.advance()
|
||||
if s.peek() == '+' || s.peek() == '-' {
|
||||
s.advance()
|
||||
}
|
||||
s.scanDecimalDigits()
|
||||
}
|
||||
return s.finishBigIntOrNumber(start)
|
||||
}
|
||||
|
||||
func (s *Scanner) finishBigIntOrNumber(start int) Token {
|
||||
if s.peek() == 'n' {
|
||||
s.advance()
|
||||
return Token{Kind: KindBigIntLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
return Token{Kind: KindNumericLiteral, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanDecimalDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '9') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanHexDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanBinaryDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if ch == '0' || ch == '1' || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanOctalDigits() {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if (ch >= '0' && ch <= '7') || ch == '_' {
|
||||
s.pos++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanIdentifierOrKeyword(start int) Token {
|
||||
for s.pos < len(s.text) {
|
||||
ch := s.text[s.pos]
|
||||
if isIdentChar(ch) {
|
||||
s.pos++
|
||||
} else if ch >= 0x80 {
|
||||
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
|
||||
if r != utf8.RuneError && (unicode.IsLetter(r) || unicode.IsDigit(r) || r == '\u200C' || r == '\u200D') {
|
||||
s.pos += size
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
text := s.text[start:s.pos]
|
||||
if kind, ok := keywordKinds[text]; ok {
|
||||
return Token{Kind: kind, TokenPos: start, Text: text}
|
||||
}
|
||||
return Token{Kind: KindIdentifier, TokenPos: start, Text: text}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanPrivateIdentifier(start int) Token {
|
||||
s.advance() // skip #
|
||||
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
|
||||
s.pos++
|
||||
}
|
||||
return Token{Kind: KindPrivateIdentifier, TokenPos: start, Text: s.text[start:s.pos]}
|
||||
}
|
||||
|
||||
func isIdentStartByte(ch byte) bool {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '$'
|
||||
}
|
||||
|
||||
func isIdentStart(ch byte) bool {
|
||||
return isIdentStartByte(ch)
|
||||
}
|
||||
|
||||
func isIdentStartRune(r rune) bool {
|
||||
return unicode.IsLetter(r) || r == '_' || r == '$'
|
||||
}
|
||||
|
||||
func isIdentChar(ch byte) bool {
|
||||
return isIdentStartByte(ch) || (ch >= '0' && ch <= '9')
|
||||
}
|
||||
|
||||
// keywordKinds maps keyword text to TS7 SyntaxKind values.
|
||||
// These start at KindBreakKeyword = 82.
|
||||
var keywordKinds = map[string]int{
|
||||
"break": 82,
|
||||
"case": 83,
|
||||
"catch": 84,
|
||||
"class": 85,
|
||||
"const": 86,
|
||||
"continue": 87,
|
||||
"debugger": 88,
|
||||
"default": 89,
|
||||
"delete": 90,
|
||||
"do": 91,
|
||||
"else": 92,
|
||||
"enum": 93,
|
||||
"export": 94,
|
||||
"extends": 95,
|
||||
"false": 96,
|
||||
"finally": 97,
|
||||
"for": 98,
|
||||
"function": 99,
|
||||
"if": 100,
|
||||
"import": 101,
|
||||
"in": 102,
|
||||
"instanceof": 103,
|
||||
"new": 104,
|
||||
"null": 105,
|
||||
"return": 106,
|
||||
"super": 107,
|
||||
"switch": 108,
|
||||
"this": 109,
|
||||
"throw": 110,
|
||||
"true": 111,
|
||||
"try": 112,
|
||||
"typeof": 113,
|
||||
"var": 114,
|
||||
"void": 115,
|
||||
"while": 116,
|
||||
"with": 117,
|
||||
// Strict mode reserved words
|
||||
"implements": 118,
|
||||
"interface": 119,
|
||||
"let": 120,
|
||||
"package": 121,
|
||||
"private": 122,
|
||||
"protected": 123,
|
||||
"public": 124,
|
||||
"static": 125,
|
||||
"yield": 126,
|
||||
// Contextual keywords
|
||||
"abstract": 127,
|
||||
"accessor": 128,
|
||||
"as": 129,
|
||||
"asserts": 130,
|
||||
"assert": 131,
|
||||
"any": 132,
|
||||
"async": 133,
|
||||
"await": 134,
|
||||
"boolean": 135,
|
||||
"constructor": 136,
|
||||
"declare": 137,
|
||||
"get": 138,
|
||||
"immediate": 139,
|
||||
"infer": 140,
|
||||
"intrinsic": 141,
|
||||
"is": 142,
|
||||
"keyof": 143,
|
||||
"module": 144,
|
||||
"namespace": 145,
|
||||
"never": 146,
|
||||
"out": 147,
|
||||
"readonly": 148,
|
||||
"require": 149,
|
||||
"number": 150,
|
||||
"object": 151,
|
||||
"satisfies": 152,
|
||||
"set": 153,
|
||||
"string": 154,
|
||||
"symbol": 155,
|
||||
"type": 156,
|
||||
"undefined": 157,
|
||||
"unique": 158,
|
||||
"unknown": 159,
|
||||
"using": 160,
|
||||
"from": 161,
|
||||
"global": 162,
|
||||
"bigint": 163,
|
||||
"override": 164,
|
||||
"of": 165,
|
||||
"defer": 166,
|
||||
}
|
||||
@@ -1,279 +1,335 @@
|
||||
package tsparser
|
||||
|
||||
// getStaticTS7Metadata returns hardcoded metadata for TypeScript 7.
|
||||
// GetStaticTS7Metadata returns hardcoded metadata for TypeScript 7.
|
||||
// This must be kept in sync with the TypeScript compiler's SyntaxKind and
|
||||
// NodeFlags enums. Eventually this should be obtained dynamically from
|
||||
// the tsgo API.
|
||||
// NodeFlags enums.
|
||||
//
|
||||
// The SyntaxKind values here correspond to the TypeScript 7 (Go port)
|
||||
// compiler. The Java extractor uses the string names (not numeric IDs)
|
||||
// to identify node kinds, so the exact numeric values only matter for
|
||||
// the metadata response.
|
||||
func getStaticTS7Metadata() *Metadata {
|
||||
func GetStaticTS7Metadata() *Metadata {
|
||||
return &Metadata{
|
||||
SyntaxKinds: syntaxKinds,
|
||||
NodeFlags: nodeFlags,
|
||||
}
|
||||
}
|
||||
|
||||
// GetSyntaxKinds returns the raw SyntaxKind name→number map.
|
||||
func GetSyntaxKinds() map[string]int {
|
||||
return syntaxKinds
|
||||
}
|
||||
|
||||
// BuildKindToNameMap returns a number→name reverse map for SyntaxKinds.
|
||||
func BuildKindToNameMap() map[uint32]string {
|
||||
m := make(map[uint32]string, len(syntaxKinds))
|
||||
for name, num := range syntaxKinds {
|
||||
key := uint32(num)
|
||||
if existing, ok := m[key]; !ok || len(name) < len(existing) {
|
||||
m[key] = name
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// syntaxKinds maps SyntaxKind names to their numeric values in TypeScript 7.
|
||||
// This is a subset covering the kinds most commonly seen in parsed ASTs.
|
||||
// The full set should be generated from the TypeScript source.
|
||||
// Generated from microsoft/typescript-go/internal/ast/kind.go (iota enum).
|
||||
var syntaxKinds = map[string]int{
|
||||
"Unknown": 0,
|
||||
"EndOfFileToken": 1,
|
||||
"SingleLineCommentTrivia": 2,
|
||||
"MultiLineCommentTrivia": 3,
|
||||
"NewLineTrivia": 4,
|
||||
"WhitespaceTrivia": 5,
|
||||
"NumericLiteral": 9,
|
||||
"BigIntLiteral": 10,
|
||||
"StringLiteral": 11,
|
||||
"RegularExpressionLiteral": 14,
|
||||
"NoSubstitutionTemplateLiteral": 15,
|
||||
"TemplateHead": 16,
|
||||
"TemplateMiddle": 17,
|
||||
"TemplateTail": 18,
|
||||
"OpenBraceToken": 19,
|
||||
"CloseBraceToken": 20,
|
||||
"OpenParenToken": 21,
|
||||
"CloseParenToken": 22,
|
||||
"OpenBracketToken": 23,
|
||||
"CloseBracketToken": 24,
|
||||
"DotToken": 25,
|
||||
"DotDotDotToken": 26,
|
||||
"SemicolonToken": 27,
|
||||
"CommaToken": 28,
|
||||
"QuestionDotToken": 29,
|
||||
"LessThanToken": 30,
|
||||
"GreaterThanToken": 31,
|
||||
"EqualsToken": 64,
|
||||
"PlusToken": 40,
|
||||
"MinusToken": 41,
|
||||
"AsteriskToken": 42,
|
||||
"SlashToken": 44,
|
||||
"ExclamationToken": 54,
|
||||
"QuestionToken": 58,
|
||||
"ColonToken": 59,
|
||||
"AtToken": 60,
|
||||
"EqualsGreaterThanToken": 39,
|
||||
"Identifier": 80,
|
||||
"BreakKeyword": 83,
|
||||
"CaseKeyword": 84,
|
||||
"CatchKeyword": 85,
|
||||
"ClassKeyword": 86,
|
||||
"ConstKeyword": 87,
|
||||
"ContinueKeyword": 88,
|
||||
"DebuggerKeyword": 89,
|
||||
"DefaultKeyword": 90,
|
||||
"DeleteKeyword": 91,
|
||||
"DoKeyword": 92,
|
||||
"ElseKeyword": 93,
|
||||
"EnumKeyword": 94,
|
||||
"ExportKeyword": 95,
|
||||
"ExtendsKeyword": 96,
|
||||
"FalseKeyword": 97,
|
||||
"FinallyKeyword": 98,
|
||||
"ForKeyword": 99,
|
||||
"FunctionKeyword": 100,
|
||||
"IfKeyword": 101,
|
||||
"ImportKeyword": 102,
|
||||
"InKeyword": 103,
|
||||
"InstanceOfKeyword": 104,
|
||||
"NewKeyword": 105,
|
||||
"NullKeyword": 106,
|
||||
"ReturnKeyword": 107,
|
||||
"SuperKeyword": 108,
|
||||
"SwitchKeyword": 109,
|
||||
"ThisKeyword": 110,
|
||||
"ThrowKeyword": 111,
|
||||
"TrueKeyword": 112,
|
||||
"TryKeyword": 113,
|
||||
"TypeOfKeyword": 114,
|
||||
"VarKeyword": 115,
|
||||
"VoidKeyword": 116,
|
||||
"WhileKeyword": 117,
|
||||
"WithKeyword": 118,
|
||||
"ImplementsKeyword": 119,
|
||||
"InterfaceKeyword": 120,
|
||||
"LetKeyword": 121,
|
||||
"PackageKeyword": 122,
|
||||
"PrivateKeyword": 123,
|
||||
"ProtectedKeyword": 124,
|
||||
"PublicKeyword": 125,
|
||||
"StaticKeyword": 126,
|
||||
"YieldKeyword": 127,
|
||||
"AbstractKeyword": 128,
|
||||
"AccessorKeyword": 129,
|
||||
"AsKeyword": 130,
|
||||
"AsyncKeyword": 134,
|
||||
"AwaitKeyword": 135,
|
||||
"ConstructorKeyword": 137,
|
||||
"DeclareKeyword": 138,
|
||||
"GetKeyword": 139,
|
||||
"InferKeyword": 140,
|
||||
"IsKeyword": 142,
|
||||
"KeyOfKeyword": 143,
|
||||
"ModuleKeyword": 144,
|
||||
"NamespaceKeyword": 145,
|
||||
"NeverKeyword": 146,
|
||||
"ReadonlyKeyword": 148,
|
||||
"RequireKeyword": 149,
|
||||
"NumberKeyword": 150,
|
||||
"ObjectKeyword": 151,
|
||||
"SetKeyword": 152,
|
||||
"StringKeyword": 153,
|
||||
"SymbolKeyword": 154,
|
||||
"TypeKeyword": 155,
|
||||
"UndefinedKeyword": 157,
|
||||
"UniqueKeyword": 158,
|
||||
"FromKeyword": 161,
|
||||
"OfKeyword": 165,
|
||||
"QualifiedName": 166,
|
||||
"ComputedPropertyName": 167,
|
||||
"TypeParameter": 168,
|
||||
"Parameter": 169,
|
||||
"Decorator": 170,
|
||||
"PropertySignature": 171,
|
||||
"PropertyDeclaration": 172,
|
||||
"MethodSignature": 173,
|
||||
"MethodDeclaration": 174,
|
||||
"ClassStaticBlockDeclaration": 175,
|
||||
"Constructor": 176,
|
||||
"GetAccessor": 177,
|
||||
"SetAccessor": 178,
|
||||
"CallSignature": 179,
|
||||
"ConstructSignature": 180,
|
||||
"IndexSignature": 181,
|
||||
"TypePredicate": 182,
|
||||
"TypeReference": 183,
|
||||
"FunctionType": 184,
|
||||
"ConstructorType": 185,
|
||||
"TypeQuery": 186,
|
||||
"TypeLiteral": 187,
|
||||
"ArrayType": 188,
|
||||
"TupleType": 189,
|
||||
"OptionalType": 190,
|
||||
"RestType": 191,
|
||||
"UnionType": 192,
|
||||
"IntersectionType": 193,
|
||||
"ConditionalType": 194,
|
||||
"InferType": 195,
|
||||
"ParenthesizedType": 196,
|
||||
"ThisType": 197,
|
||||
"TypeOperator": 198,
|
||||
"IndexedAccessType": 199,
|
||||
"MappedType": 200,
|
||||
"LiteralType": 201,
|
||||
"NamedTupleMember": 202,
|
||||
"TemplateLiteralType": 203,
|
||||
"TemplateLiteralTypeSpan": 204,
|
||||
"ImportType": 205,
|
||||
"ObjectBindingPattern": 206,
|
||||
"ArrayBindingPattern": 207,
|
||||
"BindingElement": 208,
|
||||
"ArrayLiteralExpression": 209,
|
||||
"ObjectLiteralExpression": 210,
|
||||
"PropertyAccessExpression": 211,
|
||||
"ElementAccessExpression": 212,
|
||||
"CallExpression": 213,
|
||||
"NewExpression": 214,
|
||||
"TaggedTemplateExpression": 215,
|
||||
"TypeAssertionExpression": 216,
|
||||
"ParenthesizedExpression": 217,
|
||||
"FunctionExpression": 218,
|
||||
"ArrowFunction": 219,
|
||||
"DeleteExpression": 220,
|
||||
"TypeOfExpression": 221,
|
||||
"VoidExpression": 222,
|
||||
"AwaitExpression": 223,
|
||||
"PrefixUnaryExpression": 224,
|
||||
"PostfixUnaryExpression": 225,
|
||||
"BinaryExpression": 226,
|
||||
"ConditionalExpression": 227,
|
||||
"TemplateExpression": 228,
|
||||
"YieldExpression": 229,
|
||||
"SpreadElement": 230,
|
||||
"ClassExpression": 231,
|
||||
"ExpressionWithTypeArguments": 233,
|
||||
"AsExpression": 234,
|
||||
"NonNullExpression": 235,
|
||||
"MetaProperty": 236,
|
||||
"SyntheticExpression": 237,
|
||||
"SatisfiesExpression": 238,
|
||||
"TemplateSpan": 239,
|
||||
"SemicolonClassElement": 240,
|
||||
"Block": 241,
|
||||
"EmptyStatement": 242,
|
||||
"VariableStatement": 243,
|
||||
"ExpressionStatement": 244,
|
||||
"IfStatement": 245,
|
||||
"DoStatement": 246,
|
||||
"WhileStatement": 247,
|
||||
"ForStatement": 248,
|
||||
"ForInStatement": 249,
|
||||
"ForOfStatement": 250,
|
||||
"ContinueStatement": 251,
|
||||
"BreakStatement": 252,
|
||||
"ReturnStatement": 253,
|
||||
"WithStatement": 254,
|
||||
"SwitchStatement": 255,
|
||||
"LabeledStatement": 256,
|
||||
"ThrowStatement": 257,
|
||||
"TryStatement": 258,
|
||||
"DebuggerStatement": 259,
|
||||
"VariableDeclaration": 260,
|
||||
"VariableDeclarationList": 261,
|
||||
"FunctionDeclaration": 262,
|
||||
"ClassDeclaration": 263,
|
||||
"InterfaceDeclaration": 264,
|
||||
"TypeAliasDeclaration": 265,
|
||||
"EnumDeclaration": 266,
|
||||
"ModuleDeclaration": 267,
|
||||
"ModuleBlock": 268,
|
||||
"CaseBlock": 269,
|
||||
"NamespaceExportDeclaration": 270,
|
||||
"ImportEqualsDeclaration": 271,
|
||||
"ImportDeclaration": 272,
|
||||
"ImportClause": 273,
|
||||
"NamespaceImport": 274,
|
||||
"NamedImports": 275,
|
||||
"ImportSpecifier": 276,
|
||||
"ExportAssignment": 277,
|
||||
"ExportDeclaration": 278,
|
||||
"NamedExports": 279,
|
||||
"NamespaceExport": 280,
|
||||
"ExportSpecifier": 281,
|
||||
"ExternalModuleReference": 283,
|
||||
"CaseClause": 295,
|
||||
"DefaultClause": 296,
|
||||
"HeritageClause": 297,
|
||||
"CatchClause": 298,
|
||||
"ImportAttributes": 302,
|
||||
"ImportAttribute": 303,
|
||||
"PropertyAssignment": 304,
|
||||
"ShorthandPropertyAssignment": 305,
|
||||
"SpreadAssignment": 306,
|
||||
"EnumMember": 307,
|
||||
"SourceFile": 316,
|
||||
"NotEmittedStatement": 354,
|
||||
"CommaListExpression": 360,
|
||||
"SyntaxList": 362,
|
||||
"JSDocTypeExpression": 316,
|
||||
"JSDocComment": 327,
|
||||
"JsxElement": 284,
|
||||
"JsxSelfClosingElement": 285,
|
||||
"JsxOpeningElement": 286,
|
||||
"JsxClosingElement": 287,
|
||||
"JsxFragment": 288,
|
||||
"JsxOpeningFragment": 289,
|
||||
"JsxClosingFragment": 290,
|
||||
"JsxAttribute": 291,
|
||||
"JsxAttributes": 292,
|
||||
"JsxSpreadAttribute": 293,
|
||||
"JsxExpression": 294,
|
||||
"JsxText": 12,
|
||||
"JsxTextAllWhiteSpaces": 13,
|
||||
"Unknown": 0,
|
||||
"EndOfFile": 1,
|
||||
"NumericLiteral": 8,
|
||||
"BigIntLiteral": 9,
|
||||
"StringLiteral": 10,
|
||||
"JsxText": 11,
|
||||
"JsxTextAllWhiteSpaces": 12,
|
||||
"RegularExpressionLiteral": 13,
|
||||
"NoSubstitutionTemplateLiteral": 14,
|
||||
"TemplateHead": 15,
|
||||
"TemplateMiddle": 16,
|
||||
"TemplateTail": 17,
|
||||
"OpenBraceToken": 18,
|
||||
"CloseBraceToken": 19,
|
||||
"OpenParenToken": 20,
|
||||
"CloseParenToken": 21,
|
||||
"OpenBracketToken": 22,
|
||||
"CloseBracketToken": 23,
|
||||
"DotToken": 24,
|
||||
"DotDotDotToken": 25,
|
||||
"SemicolonToken": 26,
|
||||
"CommaToken": 27,
|
||||
"QuestionDotToken": 28,
|
||||
"LessThanToken": 29,
|
||||
"GreaterThanToken": 31,
|
||||
"EqualsGreaterThanToken": 38,
|
||||
"PlusToken": 39,
|
||||
"MinusToken": 40,
|
||||
"AsteriskToken": 41,
|
||||
"SlashToken": 43,
|
||||
"PlusPlusToken": 45,
|
||||
"MinusMinusToken": 46,
|
||||
"ExclamationToken": 53,
|
||||
"TildeToken": 54,
|
||||
"QuestionToken": 57,
|
||||
"ColonToken": 58,
|
||||
"AtToken": 59,
|
||||
"EqualsToken": 63,
|
||||
"Identifier": 79,
|
||||
"PrivateIdentifier": 80,
|
||||
"BreakKeyword": 82,
|
||||
"CaseKeyword": 83,
|
||||
"CatchKeyword": 84,
|
||||
"ClassKeyword": 85,
|
||||
"ConstKeyword": 86,
|
||||
"ContinueKeyword": 87,
|
||||
"DebuggerKeyword": 88,
|
||||
"DefaultKeyword": 89,
|
||||
"DeleteKeyword": 90,
|
||||
"DoKeyword": 91,
|
||||
"ElseKeyword": 92,
|
||||
"EnumKeyword": 93,
|
||||
"ExportKeyword": 94,
|
||||
"ExtendsKeyword": 95,
|
||||
"FalseKeyword": 96,
|
||||
"FinallyKeyword": 97,
|
||||
"ForKeyword": 98,
|
||||
"FunctionKeyword": 99,
|
||||
"IfKeyword": 100,
|
||||
"ImportKeyword": 101,
|
||||
"InKeyword": 102,
|
||||
"InstanceOfKeyword": 103,
|
||||
"NewKeyword": 104,
|
||||
"NullKeyword": 105,
|
||||
"ReturnKeyword": 106,
|
||||
"SuperKeyword": 107,
|
||||
"SwitchKeyword": 108,
|
||||
"ThisKeyword": 109,
|
||||
"ThrowKeyword": 110,
|
||||
"TrueKeyword": 111,
|
||||
"TryKeyword": 112,
|
||||
"TypeOfKeyword": 113,
|
||||
"VarKeyword": 114,
|
||||
"VoidKeyword": 115,
|
||||
"WhileKeyword": 116,
|
||||
"WithKeyword": 117,
|
||||
"ImplementsKeyword": 118,
|
||||
"InterfaceKeyword": 119,
|
||||
"LetKeyword": 120,
|
||||
"PackageKeyword": 121,
|
||||
"PrivateKeyword": 122,
|
||||
"ProtectedKeyword": 123,
|
||||
"PublicKeyword": 124,
|
||||
"StaticKeyword": 125,
|
||||
"YieldKeyword": 126,
|
||||
"AbstractKeyword": 127,
|
||||
"AccessorKeyword": 128,
|
||||
"AsKeyword": 129,
|
||||
"AssertsKeyword": 130,
|
||||
"AssertKeyword": 131,
|
||||
"AnyKeyword": 132,
|
||||
"AsyncKeyword": 133,
|
||||
"AwaitKeyword": 134,
|
||||
"BooleanKeyword": 135,
|
||||
"ConstructorKeyword": 136,
|
||||
"DeclareKeyword": 137,
|
||||
"GetKeyword": 138,
|
||||
"InferKeyword": 140,
|
||||
"IntrinsicKeyword": 141,
|
||||
"IsKeyword": 142,
|
||||
"KeyOfKeyword": 143,
|
||||
"ModuleKeyword": 144,
|
||||
"NamespaceKeyword": 145,
|
||||
"NeverKeyword": 146,
|
||||
"ReadonlyKeyword": 148,
|
||||
"RequireKeyword": 149,
|
||||
"NumberKeyword": 150,
|
||||
"ObjectKeyword": 151,
|
||||
"SetKeyword": 153,
|
||||
"StringKeyword": 154,
|
||||
"SymbolKeyword": 155,
|
||||
"TypeKeyword": 156,
|
||||
"UndefinedKeyword": 157,
|
||||
"UniqueKeyword": 158,
|
||||
"UnknownKeyword": 159,
|
||||
"FromKeyword": 161,
|
||||
"BigIntKeyword": 163,
|
||||
"OverrideKeyword": 164,
|
||||
"OfKeyword": 165,
|
||||
"DeferKeyword": 166,
|
||||
"QualifiedName": 167,
|
||||
"ComputedPropertyName": 168,
|
||||
"TypeParameter": 169,
|
||||
"Parameter": 170,
|
||||
"Decorator": 171,
|
||||
"PropertySignature": 172,
|
||||
"PropertyDeclaration": 173,
|
||||
"MethodSignature": 174,
|
||||
"MethodDeclaration": 175,
|
||||
"ClassStaticBlockDeclaration": 176,
|
||||
"Constructor": 177,
|
||||
"GetAccessor": 178,
|
||||
"SetAccessor": 179,
|
||||
"CallSignature": 180,
|
||||
"ConstructSignature": 181,
|
||||
"IndexSignature": 182,
|
||||
"TypePredicate": 183,
|
||||
"TypeReference": 184,
|
||||
"FunctionType": 185,
|
||||
"ConstructorType": 186,
|
||||
"TypeQuery": 187,
|
||||
"TypeLiteral": 188,
|
||||
"ArrayType": 189,
|
||||
"TupleType": 190,
|
||||
"OptionalType": 191,
|
||||
"RestType": 192,
|
||||
"UnionType": 193,
|
||||
"IntersectionType": 194,
|
||||
"ConditionalType": 195,
|
||||
"InferType": 196,
|
||||
"ParenthesizedType": 197,
|
||||
"ThisType": 198,
|
||||
"TypeOperator": 199,
|
||||
"IndexedAccessType": 200,
|
||||
"MappedType": 201,
|
||||
"LiteralType": 202,
|
||||
"NamedTupleMember": 203,
|
||||
"TemplateLiteralType": 204,
|
||||
"TemplateLiteralTypeSpan": 205,
|
||||
"ImportType": 206,
|
||||
"ObjectBindingPattern": 207,
|
||||
"ArrayBindingPattern": 208,
|
||||
"BindingElement": 209,
|
||||
"ArrayLiteralExpression": 210,
|
||||
"ObjectLiteralExpression": 211,
|
||||
"PropertyAccessExpression": 212,
|
||||
"ElementAccessExpression": 213,
|
||||
"CallExpression": 214,
|
||||
"NewExpression": 215,
|
||||
"TaggedTemplateExpression": 216,
|
||||
"TypeAssertionExpression": 217,
|
||||
"ParenthesizedExpression": 218,
|
||||
"FunctionExpression": 219,
|
||||
"ArrowFunction": 220,
|
||||
"DeleteExpression": 221,
|
||||
"TypeOfExpression": 222,
|
||||
"VoidExpression": 223,
|
||||
"AwaitExpression": 224,
|
||||
"PrefixUnaryExpression": 225,
|
||||
"PostfixUnaryExpression": 226,
|
||||
"BinaryExpression": 227,
|
||||
"ConditionalExpression": 228,
|
||||
"TemplateExpression": 229,
|
||||
"YieldExpression": 230,
|
||||
"SpreadElement": 231,
|
||||
"ClassExpression": 232,
|
||||
"OmittedExpression": 233,
|
||||
"ExpressionWithTypeArguments": 234,
|
||||
"AsExpression": 235,
|
||||
"NonNullExpression": 236,
|
||||
"MetaProperty": 237,
|
||||
"SatisfiesExpression": 239,
|
||||
"TemplateSpan": 240,
|
||||
"SemicolonClassElement": 241,
|
||||
"Block": 242,
|
||||
"EmptyStatement": 243,
|
||||
"VariableStatement": 244,
|
||||
"ExpressionStatement": 245,
|
||||
"IfStatement": 246,
|
||||
"DoStatement": 247,
|
||||
"WhileStatement": 248,
|
||||
"ForStatement": 249,
|
||||
"ForInStatement": 250,
|
||||
"ForOfStatement": 251,
|
||||
"ContinueStatement": 252,
|
||||
"BreakStatement": 253,
|
||||
"ReturnStatement": 254,
|
||||
"WithStatement": 255,
|
||||
"SwitchStatement": 256,
|
||||
"LabeledStatement": 257,
|
||||
"ThrowStatement": 258,
|
||||
"TryStatement": 259,
|
||||
"DebuggerStatement": 260,
|
||||
"VariableDeclaration": 261,
|
||||
"VariableDeclarationList": 262,
|
||||
"FunctionDeclaration": 263,
|
||||
"ClassDeclaration": 264,
|
||||
"InterfaceDeclaration": 265,
|
||||
"TypeAliasDeclaration": 266,
|
||||
"EnumDeclaration": 267,
|
||||
"ModuleDeclaration": 268,
|
||||
"ModuleBlock": 269,
|
||||
"CaseBlock": 270,
|
||||
"NamespaceExportDeclaration": 271,
|
||||
"ImportEqualsDeclaration": 272,
|
||||
"ImportDeclaration": 273,
|
||||
"ImportClause": 274,
|
||||
"NamespaceImport": 275,
|
||||
"NamedImports": 276,
|
||||
"ImportSpecifier": 277,
|
||||
"ExportAssignment": 278,
|
||||
"ExportDeclaration": 279,
|
||||
"NamedExports": 280,
|
||||
"NamespaceExport": 281,
|
||||
"ExportSpecifier": 282,
|
||||
"MissingDeclaration": 283,
|
||||
"ExternalModuleReference": 284,
|
||||
"JsxElement": 285,
|
||||
"JsxSelfClosingElement": 286,
|
||||
"JsxOpeningElement": 287,
|
||||
"JsxClosingElement": 288,
|
||||
"JsxFragment": 289,
|
||||
"JsxOpeningFragment": 290,
|
||||
"JsxClosingFragment": 291,
|
||||
"JsxAttribute": 292,
|
||||
"JsxAttributes": 293,
|
||||
"JsxSpreadAttribute": 294,
|
||||
"JsxExpression": 295,
|
||||
"JsxNamespacedName": 296,
|
||||
"CaseClause": 297,
|
||||
"DefaultClause": 298,
|
||||
"HeritageClause": 299,
|
||||
"CatchClause": 300,
|
||||
"ImportAttributes": 301,
|
||||
"ImportAttribute": 302,
|
||||
"PropertyAssignment": 303,
|
||||
"ShorthandPropertyAssignment": 304,
|
||||
"SpreadAssignment": 305,
|
||||
"EnumMember": 306,
|
||||
"SourceFile": 307,
|
||||
"JSDocTypeExpression": 308,
|
||||
"JSDocNameReference": 309,
|
||||
"JSDocNullableType": 312,
|
||||
"JSDocNonNullableType": 313,
|
||||
"JSDocOptionalType": 314,
|
||||
"JSDocVariadicType": 315,
|
||||
"JSDoc": 316,
|
||||
"JSDocText": 317,
|
||||
"JSDocTypeLiteral": 318,
|
||||
"JSDocSignature": 319,
|
||||
"JSDocLink": 320,
|
||||
"JSDocLinkCode": 321,
|
||||
"JSDocLinkPlain": 322,
|
||||
"JSDocTag": 323,
|
||||
"JSDocAugmentsTag": 324,
|
||||
"JSDocImplementsTag": 325,
|
||||
"JSDocDeprecatedTag": 326,
|
||||
"JSDocPublicTag": 327,
|
||||
"JSDocPrivateTag": 328,
|
||||
"JSDocProtectedTag": 329,
|
||||
"JSDocReadonlyTag": 330,
|
||||
"JSDocOverrideTag": 331,
|
||||
"JSDocCallbackTag": 332,
|
||||
"JSDocOverloadTag": 333,
|
||||
"JSDocParameterTag": 334,
|
||||
"JSDocReturnTag": 335,
|
||||
"JSDocThisTag": 336,
|
||||
"JSDocTypeTag": 337,
|
||||
"JSDocTemplateTag": 338,
|
||||
"JSDocTypedefTag": 339,
|
||||
"JSDocSeeTag": 340,
|
||||
"JSDocPropertyTag": 341,
|
||||
"JSDocThrowsTag": 342,
|
||||
"JSDocSatisfiesTag": 343,
|
||||
"JSDocImportTag": 344,
|
||||
}
|
||||
|
||||
// nodeFlags maps NodeFlags names to their numeric values.
|
||||
|
||||
@@ -100,7 +100,7 @@ func (p *StandaloneParser) Parse(filename string) (*ParseResult, error) {
|
||||
|
||||
// GetMetadata returns static TS7 metadata.
|
||||
func (p *StandaloneParser) GetMetadata() (*Metadata, error) {
|
||||
return getStaticTS7Metadata(), nil
|
||||
return GetStaticTS7Metadata(), nil
|
||||
}
|
||||
|
||||
// Reset is a no-op for the standalone parser.
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/astconv"
|
||||
)
|
||||
|
||||
// TsgoParser implements the Parser interface by running the tsgo binary
|
||||
@@ -186,26 +188,36 @@ func (p *TsgoParser) sendRequest(method string, params interface{}) (json.RawMes
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "[tsgo] >>> %s id=%d\n", method, id)
|
||||
|
||||
if err := p.writeMessage(data); err != nil {
|
||||
return nil, fmt.Errorf("failed to write request: %w", err)
|
||||
}
|
||||
|
||||
// Read the response
|
||||
respData, err := p.readMessage()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
// Read responses, skipping notifications (messages without a matching id).
|
||||
// In --async mode, tsgo may send diagnostic notifications between responses.
|
||||
for {
|
||||
respData, err := p.readMessage()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
var resp jsonRPCResponse
|
||||
if err := json.Unmarshal(respData, &resp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
var resp jsonRPCResponse
|
||||
if err := json.Unmarshal(respData, &resp); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
if resp.Error != nil {
|
||||
return nil, fmt.Errorf("tsgo API error %d: %s", resp.Error.Code, resp.Error.Message)
|
||||
}
|
||||
// Skip notifications (id=0 means no id field was present in JSON)
|
||||
if resp.ID != id {
|
||||
continue
|
||||
}
|
||||
|
||||
return resp.Result, nil
|
||||
if resp.Error != nil {
|
||||
return nil, fmt.Errorf("tsgo API error %d: %s", resp.Error.Code, resp.Error.Message)
|
||||
}
|
||||
|
||||
return resp.Result, nil
|
||||
}
|
||||
}
|
||||
|
||||
// call sends a request with proper locking and initialization.
|
||||
@@ -229,30 +241,48 @@ type updateSnapshotResponse struct {
|
||||
} `json:"projects"`
|
||||
}
|
||||
|
||||
// ensureProjectOpen opens a project for the given file's directory using
|
||||
// a temporary tsconfig, or uses the existing snapshot if already open.
|
||||
// ensureProjectOpen opens a project for the given file.
|
||||
// The tsgo API requires a tsconfig for project opening, so if none exists
|
||||
// in the file's directory, we create a temporary one.
|
||||
func (p *TsgoParser) ensureProjectOpen(filename string) error {
|
||||
if p.snapshotHandle != "" && p.projectHandle != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create a snapshot by opening a project.
|
||||
// For single-file parsing without a tsconfig, we ask tsgo to open
|
||||
// the file's directory as a project. The tsgo API requires a
|
||||
// tsconfig path for OpenProject.
|
||||
dir := filepath.Dir(filename)
|
||||
base := filepath.Base(filename)
|
||||
tsconfigPath := filepath.Join(dir, "tsconfig.json")
|
||||
|
||||
// First try: updateSnapshot with the file's directory tsconfig
|
||||
// If no tsconfig exists, create a temporary one
|
||||
createdTsconfig := false
|
||||
if _, err := os.Stat(tsconfigPath); os.IsNotExist(err) {
|
||||
tsconfig := fmt.Sprintf(`{
|
||||
"compilerOptions": {
|
||||
"target": "esnext",
|
||||
"module": "esnext",
|
||||
"noEmit": true,
|
||||
"strict": false,
|
||||
"allowJs": true
|
||||
},
|
||||
"files": [%q]
|
||||
}`, base)
|
||||
if err := os.WriteFile(tsconfigPath, []byte(tsconfig), 0644); err != nil {
|
||||
return fmt.Errorf("failed to create temporary tsconfig: %w", err)
|
||||
}
|
||||
createdTsconfig = true
|
||||
}
|
||||
|
||||
result, err := p.sendRequest("updateSnapshot", map[string]interface{}{
|
||||
"openProject": tsconfigPath,
|
||||
})
|
||||
|
||||
// Clean up temporary tsconfig
|
||||
if createdTsconfig {
|
||||
os.Remove(tsconfigPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// If no tsconfig exists, try without a project
|
||||
result, err = p.sendRequest("updateSnapshot", map[string]interface{}{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create snapshot: %w", err)
|
||||
}
|
||||
return fmt.Errorf("failed to open project: %w", err)
|
||||
}
|
||||
|
||||
var resp updateSnapshotResponse
|
||||
@@ -303,18 +333,37 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) {
|
||||
return nil, fmt.Errorf("parse %s: %w", filename, err)
|
||||
}
|
||||
|
||||
// The result is the binary-encoded source file data (base64 when
|
||||
// using JSON protocol). For now, store the raw response.
|
||||
// TODO: Decode the binary format into a JSON AST.
|
||||
// The result is {"data":"<base64>"} containing a binary-encoded AST.
|
||||
var dataResp struct {
|
||||
Data string `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(result, &dataResp); err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to parse getSourceFile response: %w", filename, err)
|
||||
}
|
||||
|
||||
binaryAST, err := astconv.DecodeBinaryASTFromBase64(dataResp.Data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to decode binary AST: %w", filename, err)
|
||||
}
|
||||
|
||||
kindToName := BuildKindToNameMap()
|
||||
converter := astconv.NewConverter(binaryAST, kindToName)
|
||||
astObj, err := converter.Convert()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: failed to convert AST: %w", filename, err)
|
||||
}
|
||||
|
||||
filtered := astconv.FilterWhitelist(astObj)
|
||||
|
||||
return &ParseResult{
|
||||
AST: result,
|
||||
RawData: []byte(result),
|
||||
AST: filtered,
|
||||
RawData: []byte(dataResp.Data),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetMetadata returns the syntax kinds and node flags.
|
||||
func (p *TsgoParser) GetMetadata() (*Metadata, error) {
|
||||
return getStaticTS7Metadata(), nil
|
||||
return GetStaticTS7Metadata(), nil
|
||||
}
|
||||
|
||||
// Reset resets the parser state, killing and restarting the subprocess.
|
||||
|
||||
@@ -193,7 +193,7 @@ func TestTsgoGetMetadata(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticMetadata(t *testing.T) {
|
||||
meta := getStaticTS7Metadata()
|
||||
meta := GetStaticTS7Metadata()
|
||||
|
||||
required := []string{"SourceFile", "Identifier", "Block", "VariableStatement",
|
||||
"FunctionDeclaration", "ClassDeclaration", "InterfaceDeclaration"}
|
||||
@@ -235,3 +235,68 @@ func min(a, b int) int {
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestTsgoParse(t *testing.T) {
|
||||
if _, err := exec.LookPath("tsgo"); err != nil {
|
||||
t.Skip("tsgo not found on PATH")
|
||||
}
|
||||
|
||||
sampleFile := findTestFile(t)
|
||||
parser := NewTsgoParser(Config{Stderr: os.Stderr})
|
||||
defer parser.Close()
|
||||
|
||||
result, err := parser.Parse(sampleFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Parse failed: %v", err)
|
||||
}
|
||||
|
||||
ast, ok := result.AST.(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("Expected AST to be map[string]interface{}, got %T", result.AST)
|
||||
}
|
||||
|
||||
// Verify the root is a SourceFile
|
||||
kindVal, ok := ast["kind"]
|
||||
if !ok {
|
||||
t.Fatal("Missing 'kind' property on root node")
|
||||
}
|
||||
kindNum, ok := kindVal.(int)
|
||||
if !ok {
|
||||
t.Fatalf("Expected 'kind' to be int, got %T", kindVal)
|
||||
}
|
||||
if kindNum != 307 { // SourceFile = 307 in TS7
|
||||
t.Errorf("Expected root kind=307 (SourceFile), got %d", kindNum)
|
||||
}
|
||||
|
||||
// Verify $pos and $end
|
||||
if _, ok := ast["$pos"]; !ok {
|
||||
t.Error("Missing '$pos' property")
|
||||
}
|
||||
if _, ok := ast["$end"]; !ok {
|
||||
t.Error("Missing '$end' property")
|
||||
}
|
||||
|
||||
// Verify statements array
|
||||
stmts, ok := ast["statements"]
|
||||
if !ok {
|
||||
t.Fatal("Missing 'statements' property")
|
||||
}
|
||||
stmtsArr, ok := stmts.([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("Expected statements to be array, got %T", stmts)
|
||||
}
|
||||
if len(stmtsArr) == 0 {
|
||||
t.Error("Expected non-empty statements array")
|
||||
}
|
||||
|
||||
// Print a nicely indented snippet for debug
|
||||
jsonBytes, err := json.MarshalIndent(ast, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal AST: %v", err)
|
||||
}
|
||||
snippet := string(jsonBytes)
|
||||
if len(snippet) > 2000 {
|
||||
snippet = snippet[:2000] + "\n... (truncated)"
|
||||
}
|
||||
t.Logf("Parsed AST (first 2000 chars):\n%s", snippet)
|
||||
}
|
||||
|
||||
@@ -297,10 +297,20 @@ func TestCompareOutputs(t *testing.T) {
|
||||
os.WriteFile(filepath.Join(outDir, basename+".nodejs.json"), nodejsNorm, 0644)
|
||||
os.WriteFile(filepath.Join(outDir, basename+".go.json"), goNorm, 0644)
|
||||
|
||||
t.Errorf("Output mismatch for %s\n"+
|
||||
" Node.js output saved to: validation-output/%s.nodejs.json\n"+
|
||||
" Go output saved to: validation-output/%s.go.json",
|
||||
basename, basename, basename)
|
||||
// Parse both outputs and check for structural diffs (ignoring expected kind/flags differences)
|
||||
var nodejsObj, goObj map[string]interface{}
|
||||
json.Unmarshal(nodejsNorm, &nodejsObj)
|
||||
json.Unmarshal(goNorm, &goObj)
|
||||
|
||||
structural := countStructuralDiffs(nodejsObj["ast"], goObj["ast"], "root")
|
||||
if structural > 0 {
|
||||
t.Errorf("Output has %d structural diff(s) for %s (beyond expected kind/flags diffs)\n"+
|
||||
" Node.js output saved to: validation-output/%s.nodejs.json\n"+
|
||||
" Go output saved to: validation-output/%s.go.json",
|
||||
structural, basename, basename, basename)
|
||||
} else {
|
||||
t.Logf("Output for %s differs only in expected kind/flags/token numeric values (TS5 vs TS7)", basename)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -324,3 +334,76 @@ func TestNormalizeJSON(t *testing.T) {
|
||||
t.Errorf("got:\n%s\nexpected:\n%s", string(result), expected)
|
||||
}
|
||||
}
|
||||
|
||||
// numericValueKeys are JSON object keys whose numeric values are expected to differ
|
||||
// between TS5 and TS7 (SyntaxKind/NodeFlags numeric values).
|
||||
var numericValueKeys = map[string]bool{
|
||||
"kind": true,
|
||||
"flags": true,
|
||||
"token": true,
|
||||
"operator": true,
|
||||
}
|
||||
|
||||
// countStructuralDiffs recursively compares two JSON values and returns the
|
||||
// number of differences that are NOT expected TS5↔TS7 numeric kind/flags diffs.
|
||||
func countStructuralDiffs(a, b interface{}, path string) int {
|
||||
count := 0
|
||||
switch av := a.(type) {
|
||||
case map[string]interface{}:
|
||||
bv, ok := b.(map[string]interface{})
|
||||
if !ok {
|
||||
return 1
|
||||
}
|
||||
allKeys := map[string]bool{}
|
||||
for k := range av {
|
||||
allKeys[k] = true
|
||||
}
|
||||
for k := range bv {
|
||||
allKeys[k] = true
|
||||
}
|
||||
for k := range allKeys {
|
||||
aVal, aOk := av[k]
|
||||
bVal, bOk := bv[k]
|
||||
if !aOk || !bOk {
|
||||
count++
|
||||
continue
|
||||
}
|
||||
count += countStructuralDiffs(aVal, bVal, path+"."+k)
|
||||
}
|
||||
case []interface{}:
|
||||
bv, ok := b.([]interface{})
|
||||
if !ok {
|
||||
return 1
|
||||
}
|
||||
if len(av) != len(bv) {
|
||||
return 1
|
||||
}
|
||||
for i := range av {
|
||||
count += countStructuralDiffs(av[i], bv[i], fmt.Sprintf("%s[%d]", path, i))
|
||||
}
|
||||
default:
|
||||
if a != b {
|
||||
// Check if this is an expected numeric diff for kind/flags/token/operator
|
||||
key := lastPathComponent(path)
|
||||
if numericValueKeys[key] {
|
||||
// Both must be numbers for this to be an expected diff
|
||||
_, aNum := a.(float64)
|
||||
_, bNum := b.(float64)
|
||||
if aNum && bNum {
|
||||
return 0 // Expected TS5↔TS7 numeric diff
|
||||
}
|
||||
}
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func lastPathComponent(path string) string {
|
||||
for i := len(path) - 1; i >= 0; i-- {
|
||||
if path[i] == '.' {
|
||||
return path[i+1:]
|
||||
}
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user