TypeScript-Go wrapper: binary AST decoder, JSON converter, and tokenizer

Implement the core components for translating tsgo's binary AST format
into the JSON format expected by the Java extractor:

- decoder.go: Binary AST format parser with random-access node accessors
  (kind, pos, end, flags, children, strings, extended data)
- converter.go: Walks decoded AST and produces JSON matching Node.js
  wrapper output (augmented , , , ,
  isTypeOnly, HeritageClause token, TypeOperator operator)
- childprops.go: Maps ~100 SyntaxKind names to ordered child property
  name lists for correct bitmask-to-property assignment
- scanner.go: TypeScript tokenizer producing  array with rescan
  support for regex, template, and greater-than disambiguation

Update metadata.go with correct TS7 SyntaxKind iota values and export
metadata functions. Wire decoder+converter through TsgoParser.Parse().

Validation test passes: all 421 diffs are expected TS5-vs-TS7 numeric
kind/flags/token/operator value differences. Zero structural diffs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Asger F
2026-04-10 14:36:00 +02:00
parent 37852aa1d3
commit f3b27a56b1
9 changed files with 2475 additions and 297 deletions

View File

@@ -0,0 +1,210 @@
package astconv
// childProps maps SyntaxKind string names to ordered lists of child property names.
// The order corresponds to the bitmask order in the binary encoder. When a node
// uses the Children data type (top 2 bits = 0b00), the low byte is a bitmask
// indicating which of these properties are present. Children are consumed in order.
//
// These names must match the property names expected by the Java extractor.
// Derived from microsoft/typescript-go/internal/api/encoder/encoder.go.
var childProps = map[string][]string{
// Multi-child nodes with property mask
"QualifiedName": {"left", "right"},
"TypeParameter": {"modifiers", "name", "constraint", "default"},
"IfStatement": {"expression", "thenStatement", "elseStatement"},
"DoStatement": {"statement", "expression"},
"WhileStatement": {"expression", "statement"},
"ForStatement": {"initializer", "condition", "incrementor", "statement"},
"ForInStatement": {"awaitModifier", "initializer", "expression", "statement"},
"ForOfStatement": {"awaitModifier", "initializer", "expression", "statement"},
"WithStatement": {"expression", "statement"},
"SwitchStatement": {"expression", "caseBlock"},
"CaseClause": {"expression", "statements"},
"DefaultClause": {"expression", "statements"},
"TryStatement": {"tryBlock", "catchClause", "finallyBlock"},
"CatchClause": {"variableDeclaration", "block"},
"LabeledStatement": {"label", "statement"},
"VariableStatement": {"modifiers", "declarationList"},
"VariableDeclarationList": {"declarations"},
"VariableDeclaration": {"name", "exclamationToken", "type", "initializer"},
"Parameter": {"modifiers", "dotDotDotToken", "name", "questionToken", "type", "initializer"},
"BindingElement": {"dotDotDotToken", "propertyName", "name", "initializer"},
"FunctionDeclaration": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
"InterfaceDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
"TypeAliasDeclaration": {"modifiers", "name", "typeParameters", "type"},
"EnumMember": {"name", "initializer"},
"EnumDeclaration": {"modifiers", "name", "members"},
"ModuleDeclaration": {"modifiers", "name", "body"},
"ImportEqualsDeclaration": {"modifiers", "name", "moduleReference"},
"ImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
"JSImportDeclaration": {"modifiers", "importClause", "moduleSpecifier", "attributes"},
"ImportSpecifier": {"propertyName", "name"},
"ImportClause": {"name", "namedBindings"},
"ExportAssignment": {"modifiers", "expression"},
"JSExportAssignment": {"modifiers", "expression"},
"NamespaceExportDeclaration": {"modifiers", "name"},
"ExportDeclaration": {"modifiers", "exportClause", "moduleSpecifier", "attributes"},
"ExportSpecifier": {"propertyName", "name"},
"CallSignature": {"typeParameters", "parameters", "type"},
"ConstructSignature": {"typeParameters", "parameters", "type"},
"Constructor": {"modifiers", "typeParameters", "parameters", "type", "body"},
"GetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
"SetAccessor": {"modifiers", "name", "typeParameters", "parameters", "type", "body"},
"IndexSignature": {"modifiers", "parameters", "type"},
"MethodSignature": {"modifiers", "name", "questionToken", "typeParameters", "parameters", "type"},
"MethodDeclaration": {"modifiers", "asteriskToken", "name", "questionToken", "typeParameters", "parameters", "type", "body"},
"PropertySignature": {"modifiers", "name", "questionToken", "type", "initializer"},
"PropertyDeclaration": {"modifiers", "name", "questionToken", "type", "initializer"},
"BinaryExpression": {"left", "operatorToken", "right"},
"YieldExpression": {"asteriskToken", "expression"},
"ArrowFunction": {"modifiers", "typeParameters", "parameters", "type", "equalsGreaterThanToken", "body"},
"FunctionExpression": {"modifiers", "asteriskToken", "name", "typeParameters", "parameters", "type", "body"},
"AsExpression": {"expression", "type"},
"SatisfiesExpression": {"expression", "type"},
"ConditionalExpression": {"condition", "questionToken", "whenTrue", "colonToken", "whenFalse"},
"PropertyAccessExpression": {"expression", "questionDotToken", "name"},
"ElementAccessExpression": {"expression", "questionDotToken", "argumentExpression"},
"CallExpression": {"expression", "questionDotToken", "typeArguments", "arguments"},
"NewExpression": {"expression", "typeArguments", "arguments"},
"TemplateExpression": {"head", "templateSpans"},
"TemplateSpan": {"expression", "literal"},
"TaggedTemplateExpression": {"tag", "questionDotToken", "typeArguments", "template"},
"PropertyAssignment": {"modifiers", "name", "questionToken", "initializer"},
"ShorthandPropertyAssignment": {"modifiers", "name", "questionToken", "equalsToken", "objectAssignmentInitializer"},
"TypeAssertionExpression": {"type", "expression"},
"ConditionalType": {"checkType", "extendsType", "trueType", "falseType"},
"IndexedAccessType": {"objectType", "indexType"},
"TypeReference": {"typeName", "typeArguments"},
"ExpressionWithTypeArguments": {"expression", "typeArguments"},
"TypePredicate": {"assertsModifier", "parameterName", "type"},
"ImportType": {"argument", "attributes", "qualifier", "typeArguments"},
"ImportAttribute": {"name", "value"},
"TypeQuery": {"exprName", "typeArguments"},
"MappedType": {"readonlyToken", "typeParameter", "nameType", "questionToken", "type", "members"},
"NamedTupleMember": {"dotDotDotToken", "name", "questionToken", "type"},
"FunctionType": {"typeParameters", "parameters", "type"},
"ConstructorType": {"modifiers", "typeParameters", "parameters", "type"},
"TemplateLiteralType": {"head", "templateSpans"},
"TemplateLiteralTypeSpan": {"type", "literal"},
"JsxElement": {"openingElement", "children", "closingElement"},
"JsxNamespacedName": {"name", "namespace"},
"JsxOpeningElement": {"tagName", "typeArguments", "attributes"},
"JsxSelfClosingElement": {"tagName", "typeArguments", "attributes"},
"JsxFragment": {"openingFragment", "children", "closingFragment"},
"JsxAttribute": {"name", "initializer"},
"JsxExpression": {"dotDotDotToken", "expression"},
"JSDoc": {"comment", "tags"},
"JSDocTypeTag": {"tagName", "typeExpression", "comment"},
"JSDocTag": {"tagName", "comment"},
"JSDocTemplateTag": {"tagName", "constraint", "typeParameters", "comment"},
"JSDocReturnTag": {"tagName", "typeExpression", "comment"},
"JSDocPublicTag": {"tagName", "comment"},
"JSDocPrivateTag": {"tagName", "comment"},
"JSDocProtectedTag": {"tagName", "comment"},
"JSDocReadonlyTag": {"tagName", "comment"},
"JSDocOverrideTag": {"tagName", "comment"},
"JSDocDeprecatedTag": {"tagName", "comment"},
"JSDocSeeTag": {"tagName", "nameExpression", "comment"},
"JSDocImplementsTag": {"tagName", "className", "comment"},
"JSDocAugmentsTag": {"tagName", "className", "comment"},
"JSDocSatisfiesTag": {"tagName", "typeExpression", "comment"},
"JSDocThrowsTag": {"tagName", "typeExpression", "comment"},
"JSDocThisTag": {"tagName", "typeExpression", "comment"},
"JSDocImportTag": {"tagName", "importClause", "moduleSpecifier", "attributes", "comment"},
"JSDocCallbackTag": {"tagName", "typeExpression", "fullName", "comment"},
"JSDocOverloadTag": {"tagName", "typeExpression", "comment"},
"JSDocTypedefTag": {"tagName", "typeExpression", "name", "comment"},
"JSDocSignature": {"typeParameters", "parameters", "type"},
"ClassStaticBlockDeclaration": {"modifiers", "body"},
"ClassDeclaration": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
"ClassExpression": {"modifiers", "name", "typeParameters", "heritageClauses", "members"},
// JSDocParameterTag and JSDocPropertyTag have order-dependent children
// (handled specially in the converter based on isNameFirst defined bit).
// Default order (isNameFirst=false):
"JSDocParameterTag": {"tagName", "typeExpression", "name", "comment"},
"JSDocPropertyTag": {"tagName", "typeExpression", "name", "comment"},
}
// singleChildProp maps node kinds that have exactly one Node child to
// the property name for that child.
var singleChildProp = map[string]string{
"ReturnStatement": "expression",
"ThrowStatement": "expression",
"ExpressionStatement": "expression",
"BreakStatement": "label",
"ContinueStatement": "label",
"ParenthesizedExpression": "expression",
"ComputedPropertyName": "expression",
"Decorator": "expression",
"SpreadElement": "expression",
"SpreadAssignment": "expression",
"DeleteExpression": "expression",
"TypeOfExpression": "expression",
"VoidExpression": "expression",
"AwaitExpression": "expression",
"NonNullExpression": "expression",
"ExternalModuleReference": "expression",
"NamespaceImport": "name",
"NamespaceExport": "name",
"JsxClosingElement": "tagName",
"ArrayType": "elementType",
"LiteralType": "literal",
"InferType": "typeParameter",
"OptionalType": "type",
"RestType": "type",
"ParenthesizedType": "type",
"JSDocTypeExpression": "type",
"JSDocNonNullableType": "type",
"JSDocNullableType": "type",
"JSDocVariadicType": "type",
"JSDocOptionalType": "type",
"JSDocNameReference": "name",
}
// singleNodeListProp maps node kinds that have exactly one NodeList child
// to the property name for that child.
var singleNodeListProp = map[string]string{
"Block": "statements",
"ArrayLiteralExpression": "elements",
"ObjectLiteralExpression": "properties",
"UnionType": "types",
"IntersectionType": "types",
"TupleType": "elements",
"NamedImports": "elements",
"NamedExports": "elements",
"ModuleBlock": "statements",
"CaseBlock": "clauses",
"TypeLiteral": "members",
"JsxAttributes": "properties",
"ArrayBindingPattern": "elements",
"ObjectBindingPattern": "elements",
"HeritageClause": "types",
"JSDocTypeLiteral": "jsDocPropertyTags",
}
// operandKinds are node kinds where the single child is called "operand"
// and the operator is encoded in the defined bits.
var operandKinds = map[string]bool{
"PrefixUnaryExpression": true,
"PostfixUnaryExpression": true,
}
// GetChildProperties returns the ordered child property names for the given
// SyntaxKind name. Returns nil if the kind has no registered child properties
// (leaf node, single-child, or NodeList-child).
func GetChildProperties(kindName string) []string {
return childProps[kindName]
}
// GetSingleChildProperty returns the property name for a single-child node.
// Returns "" if the kind is not a single-child node.
func GetSingleChildProperty(kindName string) string {
return singleChildProp[kindName]
}
// GetSingleNodeListProperty returns the property name for a single-NodeList-child node.
// Returns "" if the kind is not a single-NodeList-child node.
func GetSingleNodeListProperty(kindName string) string {
return singleNodeListProp[kindName]
}

View File

@@ -0,0 +1,652 @@
package astconv
import (
"encoding/json"
"fmt"
"strings"
)
// Converter transforms a BinaryAST into the JSON format expected by the
// Java extractor.
type Converter struct {
ast *BinaryAST
kindNames map[uint32]string // numeric kind → string name
sourceText string // source file text for $lineStarts / $pos augmentation
}
// NewConverter creates a Converter for the given binary AST.
// kindToName maps numeric SyntaxKind values to their string names.
func NewConverter(ast *BinaryAST, kindToName map[uint32]string) *Converter {
return &Converter{
ast: ast,
kindNames: kindToName,
sourceText: ast.SourceText(),
}
}
// Convert transforms the binary AST into a JSON-serializable map.
// The root node is at index 1.
func (c *Converter) Convert() (map[string]interface{}, error) {
if c.ast.NodeCount() < 2 {
return nil, fmt.Errorf("no nodes to convert")
}
return c.convertNode(1)
}
// ConvertJSON is a convenience method that converts to JSON bytes.
func (c *Converter) ConvertJSON() (json.RawMessage, error) {
obj, err := c.Convert()
if err != nil {
return nil, err
}
return json.Marshal(obj)
}
func (c *Converter) convertNode(i int) (map[string]interface{}, error) {
kind := c.ast.Kind(i)
kindName := c.kindNames[kind]
if kindName == "" {
kindName = fmt.Sprintf("Unknown_%d", kind)
}
node := map[string]interface{}{
"kind": int(kind),
"flags": int(c.ast.Flags(i)),
"$pos": c.augmentPos(int(c.ast.Pos(i)), true),
"$end": int(c.ast.End(i)),
}
dataType := c.ast.DataType(i)
switch dataType {
case nodeDataTypeString:
c.handleStringNode(i, kindName, node)
case nodeDataTypeExtended:
if err := c.handleExtendedNode(i, kindName, node); err != nil {
return nil, err
}
default: // nodeDataTypeChildren
if err := c.handleChildrenNode(i, kindName, node); err != nil {
return nil, err
}
}
// Add defined-bits-based properties
c.addDefinedBitProperties(i, kindName, node)
return node, nil
}
// handleStringNode handles nodes with a string property (Identifier, StringLiteral, etc.)
func (c *Converter) handleStringNode(i int, kindName string, node map[string]interface{}) {
strIdx := c.ast.StringIndex(i)
text := c.ast.GetString(strIdx)
switch kindName {
case "Identifier", "PrivateIdentifier":
node["escapedText"] = text
default:
node["text"] = text
}
}
// handleExtendedNode handles SourceFile and template literal nodes.
func (c *Converter) handleExtendedNode(i int, kindName string, node map[string]interface{}) error {
extOff := c.ast.ExtOffset(i)
switch kindName {
case "SourceFile":
return c.handleSourceFile(i, extOff, node)
case "TemplateHead", "TemplateMiddle", "TemplateTail":
c.handleTemplateLiteral(extOff, node)
return nil
default:
return fmt.Errorf("unknown extended data node kind: %s", kindName)
}
}
// handleSourceFile extracts SourceFile-specific data from extended data.
func (c *Converter) handleSourceFile(i int, extOff uint32, node map[string]interface{}) error {
// SourceFile extended data layout:
// [0-4] textIdx, [4-8] fileNameIdx, [8-12] pathIdx,
// [12-16] languageVariant, [16-20] scriptKind,
// [20-24] referencedFiles, [24-28] typeReferenceDirectives, [28-32] libReferenceDirectives
// [32-36] imports, [36-40] moduleAugmentations, [40-44] ambientModuleNames
// [44-48] externalModuleIndicator
fileNameIdx := c.ast.ExtUint32(extOff + 4)
node["fileName"] = c.ast.GetString(fileNameIdx)
// Add source text
if c.sourceText != "" {
node["text"] = c.sourceText
node["$lineStarts"] = computeLineStarts(c.sourceText)
}
// Add empty parseDiagnostics array (expected by Java extractor)
node["parseDiagnostics"] = []interface{}{}
// Add children (statements + EndOfFile)
children := c.ast.Children(i)
for _, ci := range children {
if c.ast.IsNodeList(ci) {
arr, err := c.convertNodeList(ci)
if err != nil {
return err
}
node["statements"] = arr
}
// Skip EndOfFile token — the Java extractor doesn't use it
}
// Generate $tokens by scanning the source text.
if c.sourceText != "" {
events := c.collectRescanEvents(i)
scanner := NewScanner(c.sourceText, events)
rawTokens := scanner.ScanAll()
tokenArr := make([]interface{}, len(rawTokens))
for ti, tok := range rawTokens {
tokenArr[ti] = map[string]interface{}{
"kind": tok.Kind,
"tokenPos": c.augmentPos(tok.TokenPos, false),
"text": tok.Text,
}
}
node["$tokens"] = tokenArr
}
return nil
}
// handleTemplateLiteral extracts template literal data from extended data.
func (c *Converter) handleTemplateLiteral(extOff uint32, node map[string]interface{}) {
textIdx := c.ast.ExtUint32(extOff)
rawTextIdx := c.ast.ExtUint32(extOff + 4)
node["text"] = c.ast.GetString(textIdx)
node["rawText"] = c.ast.GetString(rawTextIdx)
}
// handleChildrenNode handles nodes with child properties determined by a bitmask.
func (c *Converter) handleChildrenNode(i int, kindName string, node map[string]interface{}) error {
children := c.ast.Children(i)
// Check for single-child nodes
if prop := GetSingleChildProperty(kindName); prop != "" {
if len(children) > 0 {
child, err := c.convertNode(children[0])
if err != nil {
return err
}
node[prop] = child
}
return nil
}
// Check for single NodeList child nodes
if prop := GetSingleNodeListProperty(kindName); prop != "" {
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
arr, err := c.convertNodeList(children[0])
if err != nil {
return err
}
node[prop] = arr
} else if len(children) > 0 {
// Some single-NodeList nodes may not have a NodeList child
// (e.g., JSDocTypeLiteral). Fall through to multi-child handling.
} else {
node[prop] = []interface{}{}
return nil
}
return nil
}
// Check for operator-in-definedBits nodes (PrefixUnaryExpression, PostfixUnaryExpression)
if operandKinds[kindName] {
if len(children) > 0 {
child, err := c.convertNode(children[0])
if err != nil {
return err
}
node["operand"] = child
}
node["operator"] = int(c.ast.DefinedBits(i))
return nil
}
// Multi-child nodes with property mask
props := GetChildProperties(kindName)
if props != nil {
return c.assignChildProperties(i, kindName, props, children, node)
}
// Token/keyword nodes with no children — nothing to add
if len(children) == 0 {
return nil
}
// MetaProperty: keywordToken + name
if kindName == "MetaProperty" {
if len(children) > 0 {
child, err := c.convertNode(children[0])
if err != nil {
return err
}
node["name"] = child
}
return nil
}
// TypeOperator: operator keyword kind inferred from source text + type child
if kindName == "TypeOperator" {
// Operator (keyof/unique/readonly) is not in the binary encoding.
pos := int(c.ast.Pos(i))
if c.sourceText != "" && pos < len(c.sourceText) {
text := c.sourceText[pos:]
// Skip leading trivia
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
text = text[1:]
}
if len(text) >= 5 && text[:5] == "keyof" {
node["operator"] = int(c.kindForName("KeyOfKeyword"))
} else if len(text) >= 6 && text[:6] == "unique" {
node["operator"] = int(c.kindForName("UniqueKeyword"))
} else if len(text) >= 8 && text[:8] == "readonly" {
node["operator"] = int(c.kindForName("ReadonlyKeyword"))
}
}
if len(children) > 0 {
child, err := c.convertNode(children[0])
if err != nil {
return err
}
node["type"] = child
}
return nil
}
// MissingDeclaration: optional modifiers child
if kindName == "MissingDeclaration" {
if len(children) > 0 && c.ast.IsNodeList(children[0]) {
arr, err := c.convertNodeList(children[0])
if err != nil {
return err
}
node["modifiers"] = arr
}
return nil
}
// Unknown node kind with children — emit them as a generic "children" array
arr := make([]interface{}, 0, len(children))
for _, ci := range children {
if c.ast.IsNodeList(ci) {
nlArr, err := c.convertNodeList(ci)
if err != nil {
return err
}
for _, item := range nlArr {
arr = append(arr, item)
}
} else {
child, err := c.convertNode(ci)
if err != nil {
return err
}
arr = append(arr, child)
}
}
if len(arr) > 0 {
node["children"] = arr
}
return nil
}
// assignChildProperties distributes children to named properties based on
// the bitmask in the node's data field.
func (c *Converter) assignChildProperties(nodeIdx int, kindName string, props []string, children []int, node map[string]interface{}) error {
mask := c.ast.ChildMask(nodeIdx)
definedBits := c.ast.DefinedBits(nodeIdx)
// Special handling for JSDocParameterTag/JSDocPropertyTag where
// child order depends on isNameFirst
if (kindName == "JSDocParameterTag" || kindName == "JSDocPropertyTag") && definedBits&2 != 0 {
// isNameFirst=true: order is tagName, name, typeExpression, comment
props = []string{"tagName", "name", "typeExpression", "comment"}
}
childIdx := 0
for bit, prop := range props {
if bit < 8 && mask != 0 && mask&(1<<uint(bit)) == 0 {
// Property not present per bitmask. For array properties,
// emit an empty array (the Java extractor expects them).
if isArrayProperty(prop) {
node[prop] = []interface{}{}
}
continue
}
// If mask is 0 (single-child or no disambiguation needed), consume sequentially
if mask == 0 && bit > 0 && childIdx >= len(children) {
break
}
if childIdx >= len(children) {
break
}
ci := children[childIdx]
childIdx++
if c.ast.IsNodeList(ci) {
arr, err := c.convertNodeList(ci)
if err != nil {
return err
}
node[prop] = arr
} else {
child, err := c.convertNode(ci)
if err != nil {
return err
}
// Remap TS7 "postfixToken" (questionToken property) to the correct name
// based on the actual token kind. TS7 uses a single PostfixToken
// for what TS5 had as separate questionToken/exclamationToken.
if prop == "questionToken" {
childKind := c.ast.Kind(ci)
exclamationKind := c.kindForName("ExclamationToken")
if exclamationKind != 0 && childKind == exclamationKind {
prop = "exclamationToken"
}
}
node[prop] = child
}
}
return nil
}
// isArrayProperty returns true for property names that should be empty arrays
// (not omitted) when absent in the binary AST.
func isArrayProperty(prop string) bool {
return arrayProperties[prop]
}
var arrayProperties = map[string]bool{
"arguments": true,
"elements": true,
"properties": true,
"members": true,
}
// convertNodeList converts a NodeList into a JSON array.
func (c *Converter) convertNodeList(i int) ([]interface{}, error) {
children := c.ast.Children(i)
arr := make([]interface{}, 0, len(children))
for _, ci := range children {
child, err := c.convertNode(ci)
if err != nil {
return nil, err
}
arr = append(arr, child)
}
return arr, nil
}
// addDefinedBitProperties adds properties derived from the defined bits
// (bits 24-29 of the data field) that aren't part of the child tree.
func (c *Converter) addDefinedBitProperties(i int, kindName string, node map[string]interface{}) {
definedBits := c.ast.DefinedBits(i)
switch kindName {
case "ImportSpecifier", "ImportEqualsDeclaration", "ExportSpecifier", "ExportDeclaration":
node["isTypeOnly"] = definedBits&1 != 0
case "ImportClause":
node["isTypeOnly"] = definedBits&1 != 0
if definedBits&2 != 0 {
node["phaseModifier"] = "defer"
}
case "ImportType":
if definedBits&1 != 0 {
node["isTypeOf"] = true
}
case "ExportAssignment", "JSExportAssignment":
if definedBits&1 != 0 {
node["isExportEquals"] = true
}
case "VariableDeclarationList":
// Determine $declarationKind from defined bits
if definedBits&2 != 0 {
node["$declarationKind"] = "const"
} else if definedBits&1 != 0 {
node["$declarationKind"] = "let"
} else {
node["$declarationKind"] = "var"
}
case "ImportAttributes":
if definedBits&2 != 0 {
node["token"] = c.kindForName("AssertKeyword")
} else {
node["token"] = c.kindForName("WithKeyword")
}
case "HeritageClause":
// Token (extends/implements) is not in the binary encoding.
// Infer from source text, skipping leading trivia.
pos := int(c.ast.Pos(i))
if c.sourceText != "" && pos < len(c.sourceText) {
text := c.sourceText[pos:]
// Skip whitespace/newlines
for len(text) > 0 && (text[0] == ' ' || text[0] == '\t' || text[0] == '\n' || text[0] == '\r') {
text = text[1:]
}
if len(text) >= 10 && text[:10] == "implements" {
node["token"] = int(c.kindForName("ImplementsKeyword"))
} else {
node["token"] = int(c.kindForName("ExtendsKeyword"))
}
}
case "JSDocParameterTag", "JSDocPropertyTag":
if definedBits&1 != 0 {
node["isBracketed"] = true
}
if definedBits&2 != 0 {
node["isNameFirst"] = true
}
}
}
// augmentPos replicates the Node.js wrapper's $pos augmentation:
// if skipTrivia is true, advances past leading whitespace and comments.
func (c *Converter) augmentPos(pos int, skipTrivia bool) int {
if !skipTrivia || c.sourceText == "" || pos >= len(c.sourceText) {
return pos
}
// Skip whitespace and comments (matching the regex /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g)
i := pos
n := len(c.sourceText)
for i < n {
ch := c.sourceText[i]
if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\v' {
i++
continue
}
if ch == '/' && i+1 < n {
next := c.sourceText[i+1]
if next == '/' {
// Single-line comment — skip to end of line
i += 2
for i < n && c.sourceText[i] != '\n' {
i++
}
continue
}
if next == '*' {
// Multi-line comment — skip to */
i += 2
for i+1 < n {
if c.sourceText[i] == '*' && c.sourceText[i+1] == '/' {
i += 2
break
}
i++
}
continue
}
}
break
}
return i
}
// computeLineStarts returns an array of byte offsets where each line starts.
func computeLineStarts(text string) []int {
starts := []int{0}
for i := 0; i < len(text); i++ {
ch := text[i]
if ch == '\n' {
starts = append(starts, i+1)
} else if ch == '\r' {
if i+1 < len(text) && text[i+1] == '\n' {
i++
}
starts = append(starts, i+1)
}
}
return starts
}
// kindForName returns the numeric kind for a given string name.
// This is the reverse of kindNames. Returns 0 if not found.
func (c *Converter) kindForName(name string) uint32 {
for k, v := range c.kindNames {
if v == name {
return k
}
}
return 0
}
// collectRescanEvents walks the AST to find positions that need rescanning.
// This matches the Node.js wrapper's rescan logic in ast_extractor.ts.
func (c *Converter) collectRescanEvents(root int) []RescanEvent {
var events []RescanEvent
c.walkForRescan(root, &events)
// Sort by position
sortRescanEvents(events)
return events
}
func (c *Converter) walkForRescan(i int, events *[]RescanEvent) {
if i <= 0 || i >= c.ast.NodeCount() {
return
}
if c.ast.IsNodeList(i) {
for _, ci := range c.ast.Children(i) {
c.walkForRescan(ci, events)
}
return
}
kind := c.ast.Kind(i)
kindName := c.kindNames[kind]
// RegularExpressionLiteral needs rescan (scanner sees / as SlashToken)
if kindName == "RegularExpressionLiteral" {
pos := c.augmentPos(int(c.ast.Pos(i)), true)
*events = append(*events, RescanEvent{Pos: pos, Kind: "regex"})
}
// TemplateMiddle and TemplateTail need rescan (scanner sees } as CloseBraceToken)
if kindName == "TemplateMiddle" || kindName == "TemplateTail" {
pos := c.augmentPos(int(c.ast.Pos(i)), true)
*events = append(*events, RescanEvent{Pos: pos, Kind: "template"})
}
// BinaryExpression with >>= or >>> etc. needs rescan (scanner may see > separately)
if kindName == "BinaryExpression" {
children := c.ast.Children(i)
if len(children) >= 3 {
// BinaryExpression children: left, operatorToken, right
opKind := c.kindNames[c.ast.Kind(children[1])]
switch opKind {
case "GreaterThanEqualsToken", "GreaterThanGreaterThanEqualsToken",
"GreaterThanGreaterThanGreaterThanEqualsToken",
"GreaterThanGreaterThanGreaterThanToken", "GreaterThanGreaterThanToken":
pos := c.augmentPos(int(c.ast.Pos(children[1])), true)
*events = append(*events, RescanEvent{Pos: pos, Kind: "greater"})
}
}
}
// Recurse into children
for _, ci := range c.ast.Children(i) {
c.walkForRescan(ci, events)
}
}
func sortRescanEvents(events []RescanEvent) {
// Simple insertion sort — events are typically few
for i := 1; i < len(events); i++ {
key := events[i]
j := i - 1
for j >= 0 && events[j].Pos > key.Pos {
events[j+1] = events[j]
j--
}
events[j+1] = key
}
}
// FilterWhitelist removes properties from the converted AST that are not
// in the property whitelist. This is applied recursively.
func FilterWhitelist(obj map[string]interface{}) map[string]interface{} {
result := make(map[string]interface{}, len(obj))
for k, v := range obj {
if !IsAllowedProperty(k) {
continue
}
switch val := v.(type) {
case map[string]interface{}:
result[k] = FilterWhitelist(val)
case []interface{}:
result[k] = filterWhitelistArray(val)
default:
result[k] = v
}
}
return result
}
func filterWhitelistArray(arr []interface{}) []interface{} {
result := make([]interface{}, len(arr))
for i, v := range arr {
if obj, ok := v.(map[string]interface{}); ok {
result[i] = FilterWhitelist(obj)
} else {
result[i] = v
}
}
return result
}
// BuildKindToNameMap builds a reverse mapping from numeric kind to string name
// from a SyntaxKinds metadata map (name → number).
func BuildKindToNameMap(syntaxKinds map[string]int) map[uint32]string {
result := make(map[uint32]string, len(syntaxKinds))
for name, num := range syntaxKinds {
key := uint32(num)
// In case of collisions, prefer shorter/simpler names
if existing, ok := result[key]; !ok || len(name) < len(existing) {
result[key] = name
}
}
return result
}
// StripKindPrefix removes "Kind" prefix from names if present (for TS7 Go-style names).
func StripKindPrefix(name string) string {
if strings.HasPrefix(name, "Kind") {
return name[4:]
}
return name
}

View File

@@ -0,0 +1,221 @@
// Package astconv decodes the binary AST format produced by the tsgo API
// and converts it to the JSON format expected by the Java extractor.
//
// The binary format is documented in microsoft/typescript-go/internal/api/encoder/encoder.go.
// Each source file is encoded as:
//
// Header (44 bytes) | String offsets | String data | Extended data | Structured data | Nodes (28 bytes each)
//
// Nodes are in a flat array with parent/next-sibling indices. The first node (index 0)
// is a nil sentinel. The root node is at index 1.
package astconv
import (
"encoding/base64"
"encoding/binary"
"fmt"
)
// Binary format constants matching microsoft/typescript-go/internal/api/encoder.
const (
nodeSize = 28 // 7 × uint32
nodeOffsetKind = 0
nodeOffsetPos = 4
nodeOffsetEnd = 8
nodeOffsetNext = 12
nodeOffsetParent = 16
nodeOffsetData = 20
nodeOffsetFlags = 24
headerSize = 44
headerOffsetMetadata = 0
headerOffsetStringOff = 24
headerOffsetStringData = 28
headerOffsetExtData = 32
headerOffsetStructData = 36
headerOffsetNodes = 40
protocolVersion uint8 = 5
nodeDataTypeChildren uint32 = 0x00_00_00_00
nodeDataTypeString uint32 = 0x40_00_00_00
nodeDataTypeExtended uint32 = 0x80_00_00_00
nodeDataTypeMask uint32 = 0xC0_00_00_00
nodeDataChildMask uint32 = 0x00_00_00_FF
nodeDataStringMask uint32 = 0x00_FF_FF_FF
// SyntaxKindNodeList is the special kind value used for NodeList nodes.
SyntaxKindNodeList uint32 = 0xFF_FF_FF_FF
)
// BinaryAST provides random access to nodes in a binary-encoded TypeScript AST.
type BinaryAST struct {
raw []byte
strOff uint32 // byte offset to string offset pairs
strData uint32 // byte offset to string data
extData uint32 // byte offset to extended node data
structOff uint32 // byte offset to structured data
nodeOff uint32 // byte offset to nodes section
nodeCount int
// Single Go string covering all data from strData onward.
// String offsets index into this, so substrings are zero-alloc.
allStrData string
}
// DecodeBinaryAST parses the binary header and returns a BinaryAST for
// random-access to nodes and strings.
func DecodeBinaryAST(data []byte) (*BinaryAST, error) {
if len(data) < headerSize {
return nil, fmt.Errorf("data too short: %d bytes (need %d)", len(data), headerSize)
}
version := data[headerOffsetMetadata+3]
if version != protocolVersion {
return nil, fmt.Errorf("unsupported protocol version %d (expected %d)", version, protocolVersion)
}
b := &BinaryAST{
raw: data,
strOff: le32(data, headerOffsetStringOff),
strData: le32(data, headerOffsetStringData),
extData: le32(data, headerOffsetExtData),
structOff: le32(data, headerOffsetStructData),
nodeOff: le32(data, headerOffsetNodes),
}
dataLen := uint32(len(data))
if b.strOff > dataLen || b.strData > dataLen || b.extData > dataLen || b.nodeOff > dataLen {
return nil, fmt.Errorf("invalid header offsets exceed data length %d", dataLen)
}
b.nodeCount = (len(data) - int(b.nodeOff)) / nodeSize
if b.nodeCount < 2 {
return nil, fmt.Errorf("no nodes in AST (count=%d, need at least 2)", b.nodeCount)
}
// The official decoder uses data[strData:] for zero-alloc substring slicing.
b.allStrData = string(data[b.strData:])
return b, nil
}
// DecodeBinaryASTFromBase64 decodes a base64-encoded binary AST, as returned
// by tsgo's getSourceFile API in JSON ({"data":"<base64>"}).
func DecodeBinaryASTFromBase64(b64 string) (*BinaryAST, error) {
data, err := base64.StdEncoding.DecodeString(b64)
if err != nil {
return nil, fmt.Errorf("base64 decode failed: %w", err)
}
return DecodeBinaryAST(data)
}
// NodeCount returns the total number of nodes (including the nil sentinel at index 0).
func (b *BinaryAST) NodeCount() int { return b.nodeCount }
// Node field accessors — all read uint32 from the nodes section.
func (b *BinaryAST) nf(i, offset int) uint32 {
return le32(b.raw, int(b.nodeOff)+i*nodeSize+offset)
}
// Kind returns the SyntaxKind of node i.
func (b *BinaryAST) Kind(i int) uint32 { return b.nf(i, nodeOffsetKind) }
// Pos returns the start position (UTF-16 offset) of node i.
func (b *BinaryAST) Pos(i int) uint32 { return b.nf(i, nodeOffsetPos) }
// End returns the end position (UTF-16 offset) of node i.
func (b *BinaryAST) End(i int) uint32 { return b.nf(i, nodeOffsetEnd) }
// Next returns the index of the next sibling of node i, or 0 if none.
func (b *BinaryAST) Next(i int) uint32 { return b.nf(i, nodeOffsetNext) }
// Parent returns the index of the parent of node i, or 0 if none.
func (b *BinaryAST) Parent(i int) uint32 { return b.nf(i, nodeOffsetParent) }
// Data returns the raw 32-bit data field of node i.
func (b *BinaryAST) Data(i int) uint32 { return b.nf(i, nodeOffsetData) }
// Flags returns the NodeFlags of node i.
func (b *BinaryAST) Flags(i int) uint32 { return b.nf(i, nodeOffsetFlags) }
// DataType returns the top 2 bits of the data field (Children, String, or Extended).
func (b *BinaryAST) DataType(i int) uint32 { return b.Data(i) & nodeDataTypeMask }
// DefinedBits returns bits 24-29 of the data field (6 bits of per-node-type flags).
func (b *BinaryAST) DefinedBits(i int) uint8 { return uint8((b.Data(i) >> 24) & 0x3F) }
// ChildMask returns the low byte of the data field (child property bitmask).
func (b *BinaryAST) ChildMask(i int) uint8 { return uint8(b.Data(i) & nodeDataChildMask) }
// StringIndex returns the 24-bit string table index from the data field.
func (b *BinaryAST) StringIndex(i int) uint32 { return b.Data(i) & nodeDataStringMask }
// ExtOffset returns the 24-bit offset into the extended data section from the data field.
func (b *BinaryAST) ExtOffset(i int) uint32 { return b.Data(i) & nodeDataStringMask }
// NodeListLen returns the number of children for a NodeList node (stored in data field).
func (b *BinaryAST) NodeListLen(i int) uint32 { return b.Data(i) }
// IsNodeList returns true if node i is a NodeList.
func (b *BinaryAST) IsNodeList(i int) bool { return b.Kind(i) == SyntaxKindNodeList }
// GetString reads a string from the string table at the given offset index.
// The index comes from a String-type node's data field (24-bit value).
func (b *BinaryAST) GetString(idx uint32) string {
// Each string entry is two uint32 values (start, end) in the string offsets section.
offBase := int(b.strOff) + int(idx)*4
start := le32(b.raw, offBase)
end := le32(b.raw, offBase+4)
return b.allStrData[start:end]
}
// ExtUint32 reads a uint32 from the extended data section at the given byte offset.
func (b *BinaryAST) ExtUint32(off uint32) uint32 {
return le32(b.raw, int(b.extData)+int(off))
}
// Children returns the indices of all direct children of node i.
// Children are identified by having parent == i. The first child is at i+1
// (if its parent is i), and subsequent children are found via Next pointers.
func (b *BinaryAST) Children(i int) []int {
if i+1 >= b.nodeCount {
return nil
}
firstChild := i + 1
if b.Parent(firstChild) != uint32(i) {
return nil
}
children := []int{firstChild}
next := int(b.Next(firstChild))
for next != 0 {
children = append(children, next)
next = int(b.Next(next))
}
return children
}
// SourceText returns the source file text, extracted from the SourceFile's
// extended data. Returns "" if the root node is not a SourceFile or if
// the extended data is missing.
func (b *BinaryAST) SourceText() string {
if b.nodeCount < 2 {
return ""
}
// Root is at index 1. Check if it has extended data type.
if b.DataType(1)&nodeDataTypeMask != nodeDataTypeExtended {
return ""
}
extOff := b.ExtOffset(1)
textIdx := b.ExtUint32(extOff)
return b.GetString(textIdx)
}
func le32(data []byte, offset int) uint32 {
if offset < 0 || offset+4 > len(data) {
return 0
}
return binary.LittleEndian.Uint32(data[offset : offset+4])
}

View File

@@ -0,0 +1,842 @@
package astconv
import (
"unicode"
"unicode/utf8"
)
// TS7 SyntaxKind values for tokens (from microsoft/typescript-go internal/ast/kind.go).
const (
KindUnknown = 0
KindEndOfFile = 1
KindSingleLineCommentTrivia = 2
KindMultiLineCommentTrivia = 3
KindNewLineTrivia = 4
KindWhitespaceTrivia = 5
KindConflictMarkerTrivia = 6
KindNumericLiteral = 8
KindBigIntLiteral = 9
KindStringLiteral = 10
KindRegularExpressionLiteral = 13
KindNoSubstitutionTemplateLiteral = 14
KindTemplateHead = 15
KindTemplateMiddle = 16
KindTemplateTail = 17
KindOpenBraceToken = 18
KindCloseBraceToken = 19
KindOpenParenToken = 20
KindCloseParenToken = 21
KindOpenBracketToken = 22
KindCloseBracketToken = 23
KindDotToken = 24
KindDotDotDotToken = 25
KindSemicolonToken = 26
KindCommaToken = 27
KindQuestionDotToken = 28
KindLessThanToken = 29
KindLessThanSlashToken = 30
KindGreaterThanToken = 31
KindLessThanEqualsToken = 32
KindGreaterThanEqualsToken = 33
KindEqualsEqualsToken = 34
KindExclamationEqualsToken = 35
KindEqualsEqualsEqualsToken = 36
KindExclamationEqualsEqualsToken = 37
KindEqualsGreaterThanToken = 38
KindPlusToken = 39
KindMinusToken = 40
KindAsteriskToken = 41
KindAsteriskAsteriskToken = 42
KindSlashToken = 43
KindPercentToken = 44
KindPlusPlusToken = 45
KindMinusMinusToken = 46
KindLessThanLessThanToken = 47
KindGreaterThanGreaterThanToken = 48
KindGreaterThanGreaterThanGreaterThanToken = 49
KindAmpersandToken = 50
KindBarToken = 51
KindCaretToken = 52
KindExclamationToken = 53
KindTildeToken = 54
KindAmpersandAmpersandToken = 55
KindBarBarToken = 56
KindQuestionToken = 57
KindColonToken = 58
KindAtToken = 59
KindQuestionQuestionToken = 60
KindHashToken = 62
KindEqualsToken = 63
KindPlusEqualsToken = 64
KindMinusEqualsToken = 65
KindAsteriskEqualsToken = 66
KindAsteriskAsteriskEqualsToken = 67
KindSlashEqualsToken = 68
KindPercentEqualsToken = 69
KindLessThanLessThanEqualsToken = 70
KindGreaterThanGreaterThanEqualsToken = 71
KindGreaterThanGreaterThanGreaterThanEqualsToken = 72
KindAmpersandEqualsToken = 73
KindBarEqualsToken = 74
KindBarBarEqualsToken = 75
KindAmpersandAmpersandEqualsToken = 76
KindQuestionQuestionEqualsToken = 77
KindCaretEqualsToken = 78
KindIdentifier = 79
KindPrivateIdentifier = 80
)
// Token represents a single token from the scanner.
type Token struct {
Kind int `json:"kind"`
TokenPos int `json:"tokenPos"`
Text string `json:"text"`
}
// RescanEvent tells the scanner to rescan at a given position.
type RescanEvent struct {
Pos int
Kind string // "regex", "template", "greater"
}
// Scanner tokenizes TypeScript source text.
type Scanner struct {
text string
pos int
events []RescanEvent
evIdx int
}
// NewScanner creates a scanner for the given source text.
// rescanEvents should be sorted by position. They inform the scanner
// about positions where regex literals, template tokens, or greater-than
// rescanning is needed (matching the Node.js wrapper behavior).
func NewScanner(text string, rescanEvents []RescanEvent) *Scanner {
return &Scanner{
text: text,
pos: 0,
events: rescanEvents,
evIdx: 0,
}
}
// ScanAll produces all tokens from the source text.
func (s *Scanner) ScanAll() []Token {
var tokens []Token
for {
tok := s.scan()
tokens = append(tokens, tok)
if tok.Kind == KindEndOfFile {
break
}
}
return tokens
}
func (s *Scanner) peek() byte {
if s.pos >= len(s.text) {
return 0
}
return s.text[s.pos]
}
func (s *Scanner) peekAt(offset int) byte {
p := s.pos + offset
if p >= len(s.text) {
return 0
}
return s.text[p]
}
func (s *Scanner) advance() {
s.pos++
}
func (s *Scanner) nextRescanPos() int {
if s.evIdx < len(s.events) {
return s.events[s.evIdx].Pos
}
return int(^uint(0) >> 1) // MaxInt
}
func (s *Scanner) nextRescanKind() string {
if s.evIdx < len(s.events) {
return s.events[s.evIdx].Kind
}
return ""
}
func (s *Scanner) consumeRescan() {
if s.evIdx < len(s.events) {
s.evIdx++
}
}
func (s *Scanner) scan() Token {
if s.pos >= len(s.text) {
return Token{Kind: KindEndOfFile, TokenPos: s.pos, Text: ""}
}
tokenPos := s.pos
ch := s.peek()
// Whitespace (not newlines)
if ch == ' ' || ch == '\t' || ch == '\f' || ch == '\v' {
for s.pos < len(s.text) {
c := s.text[s.pos]
if c == ' ' || c == '\t' || c == '\f' || c == '\v' {
s.pos++
} else {
break
}
}
return Token{Kind: KindWhitespaceTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
}
// Newlines
if ch == '\n' {
s.advance()
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: "\n"}
}
if ch == '\r' {
s.advance()
if s.peek() == '\n' {
s.advance()
}
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
}
// Check for rescan event at this position
if tokenPos == s.nextRescanPos() {
kind := s.nextRescanKind()
s.consumeRescan()
switch kind {
case "regex":
return s.scanRegExp(tokenPos)
case "template":
return s.scanTemplatePart(tokenPos, true)
case "greater":
return s.scanGreater(tokenPos)
}
}
switch ch {
case '/':
next := s.peekAt(1)
if next == '/' {
return s.scanSingleLineComment(tokenPos)
}
if next == '*' {
return s.scanMultiLineComment(tokenPos)
}
if next == '=' {
s.pos += 2
return Token{Kind: KindSlashEqualsToken, TokenPos: tokenPos, Text: "/="}
}
s.advance()
return Token{Kind: KindSlashToken, TokenPos: tokenPos, Text: "/"}
case '\'', '"':
return s.scanString(tokenPos, ch)
case '`':
return s.scanTemplatePart(tokenPos, false)
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return s.scanNumber(tokenPos)
case '{':
s.advance()
return Token{Kind: KindOpenBraceToken, TokenPos: tokenPos, Text: "{"}
case '}':
s.advance()
return Token{Kind: KindCloseBraceToken, TokenPos: tokenPos, Text: "}"}
case '(':
s.advance()
return Token{Kind: KindOpenParenToken, TokenPos: tokenPos, Text: "("}
case ')':
s.advance()
return Token{Kind: KindCloseParenToken, TokenPos: tokenPos, Text: ")"}
case '[':
s.advance()
return Token{Kind: KindOpenBracketToken, TokenPos: tokenPos, Text: "["}
case ']':
s.advance()
return Token{Kind: KindCloseBracketToken, TokenPos: tokenPos, Text: "]"}
case ';':
s.advance()
return Token{Kind: KindSemicolonToken, TokenPos: tokenPos, Text: ";"}
case ',':
s.advance()
return Token{Kind: KindCommaToken, TokenPos: tokenPos, Text: ","}
case '~':
s.advance()
return Token{Kind: KindTildeToken, TokenPos: tokenPos, Text: "~"}
case '@':
s.advance()
return Token{Kind: KindAtToken, TokenPos: tokenPos, Text: "@"}
case '.':
if s.peekAt(1) == '.' && s.peekAt(2) == '.' {
s.pos += 3
return Token{Kind: KindDotDotDotToken, TokenPos: tokenPos, Text: "..."}
}
// .123 numeric literal
if s.peekAt(1) >= '0' && s.peekAt(1) <= '9' {
return s.scanNumber(tokenPos)
}
s.advance()
return Token{Kind: KindDotToken, TokenPos: tokenPos, Text: "."}
case ':':
s.advance()
return Token{Kind: KindColonToken, TokenPos: tokenPos, Text: ":"}
case '?':
if s.peekAt(1) == '.' && !(s.peekAt(2) >= '0' && s.peekAt(2) <= '9') {
s.pos += 2
return Token{Kind: KindQuestionDotToken, TokenPos: tokenPos, Text: "?."}
}
if s.peekAt(1) == '?' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindQuestionQuestionEqualsToken, TokenPos: tokenPos, Text: "??="}
}
s.pos += 2
return Token{Kind: KindQuestionQuestionToken, TokenPos: tokenPos, Text: "??"}
}
s.advance()
return Token{Kind: KindQuestionToken, TokenPos: tokenPos, Text: "?"}
case '!':
if s.peekAt(1) == '=' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindExclamationEqualsEqualsToken, TokenPos: tokenPos, Text: "!=="}
}
s.pos += 2
return Token{Kind: KindExclamationEqualsToken, TokenPos: tokenPos, Text: "!="}
}
s.advance()
return Token{Kind: KindExclamationToken, TokenPos: tokenPos, Text: "!"}
case '=':
if s.peekAt(1) == '=' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindEqualsEqualsEqualsToken, TokenPos: tokenPos, Text: "==="}
}
s.pos += 2
return Token{Kind: KindEqualsEqualsToken, TokenPos: tokenPos, Text: "=="}
}
if s.peekAt(1) == '>' {
s.pos += 2
return Token{Kind: KindEqualsGreaterThanToken, TokenPos: tokenPos, Text: "=>"}
}
s.advance()
return Token{Kind: KindEqualsToken, TokenPos: tokenPos, Text: "="}
case '+':
if s.peekAt(1) == '+' {
s.pos += 2
return Token{Kind: KindPlusPlusToken, TokenPos: tokenPos, Text: "++"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindPlusEqualsToken, TokenPos: tokenPos, Text: "+="}
}
s.advance()
return Token{Kind: KindPlusToken, TokenPos: tokenPos, Text: "+"}
case '-':
if s.peekAt(1) == '-' {
s.pos += 2
return Token{Kind: KindMinusMinusToken, TokenPos: tokenPos, Text: "--"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindMinusEqualsToken, TokenPos: tokenPos, Text: "-="}
}
s.advance()
return Token{Kind: KindMinusToken, TokenPos: tokenPos, Text: "-"}
case '*':
if s.peekAt(1) == '*' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindAsteriskAsteriskEqualsToken, TokenPos: tokenPos, Text: "**="}
}
s.pos += 2
return Token{Kind: KindAsteriskAsteriskToken, TokenPos: tokenPos, Text: "**"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindAsteriskEqualsToken, TokenPos: tokenPos, Text: "*="}
}
s.advance()
return Token{Kind: KindAsteriskToken, TokenPos: tokenPos, Text: "*"}
case '%':
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindPercentEqualsToken, TokenPos: tokenPos, Text: "%="}
}
s.advance()
return Token{Kind: KindPercentToken, TokenPos: tokenPos, Text: "%"}
case '<':
if s.peekAt(1) == '<' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindLessThanLessThanEqualsToken, TokenPos: tokenPos, Text: "<<="}
}
s.pos += 2
return Token{Kind: KindLessThanLessThanToken, TokenPos: tokenPos, Text: "<<"}
}
if s.peekAt(1) == '/' {
s.pos += 2
return Token{Kind: KindLessThanSlashToken, TokenPos: tokenPos, Text: "</"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindLessThanEqualsToken, TokenPos: tokenPos, Text: "<="}
}
s.advance()
return Token{Kind: KindLessThanToken, TokenPos: tokenPos, Text: "<"}
case '>':
return s.scanGreater(tokenPos)
case '&':
if s.peekAt(1) == '&' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindAmpersandAmpersandEqualsToken, TokenPos: tokenPos, Text: "&&="}
}
s.pos += 2
return Token{Kind: KindAmpersandAmpersandToken, TokenPos: tokenPos, Text: "&&"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindAmpersandEqualsToken, TokenPos: tokenPos, Text: "&="}
}
s.advance()
return Token{Kind: KindAmpersandToken, TokenPos: tokenPos, Text: "&"}
case '|':
if s.peekAt(1) == '|' {
if s.peekAt(2) == '=' {
s.pos += 3
return Token{Kind: KindBarBarEqualsToken, TokenPos: tokenPos, Text: "||="}
}
s.pos += 2
return Token{Kind: KindBarBarToken, TokenPos: tokenPos, Text: "||"}
}
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindBarEqualsToken, TokenPos: tokenPos, Text: "|="}
}
s.advance()
return Token{Kind: KindBarToken, TokenPos: tokenPos, Text: "|"}
case '^':
if s.peekAt(1) == '=' {
s.pos += 2
return Token{Kind: KindCaretEqualsToken, TokenPos: tokenPos, Text: "^="}
}
s.advance()
return Token{Kind: KindCaretToken, TokenPos: tokenPos, Text: "^"}
case '#':
// Could be private identifier
if s.peekAt(1) == '!' && tokenPos == 0 {
// Shebang — scan to end of line
return s.scanSingleLineComment(tokenPos)
}
if isIdentStart(s.peekAt(1)) {
return s.scanPrivateIdentifier(tokenPos)
}
s.advance()
return Token{Kind: KindHashToken, TokenPos: tokenPos, Text: "#"}
}
// Identifier or keyword
if isIdentStartByte(ch) {
return s.scanIdentifierOrKeyword(tokenPos)
}
// Handle multi-byte Unicode identifier starts
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
if r != utf8.RuneError && isIdentStartRune(r) {
return s.scanIdentifierOrKeyword(tokenPos)
}
// Unknown character
s.pos += size
return Token{Kind: KindUnknown, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
}
func (s *Scanner) scanSingleLineComment(start int) Token {
s.pos += 2 // skip //
for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' {
s.pos++
}
return Token{Kind: KindSingleLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanMultiLineComment(start int) Token {
s.pos += 2 // skip /*
for s.pos < len(s.text)-1 {
if s.text[s.pos] == '*' && s.text[s.pos+1] == '/' {
s.pos += 2
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
}
s.pos++
}
// Unterminated
s.pos = len(s.text)
return Token{Kind: KindMultiLineCommentTrivia, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanString(start int, quote byte) Token {
s.advance() // skip opening quote
for s.pos < len(s.text) {
ch := s.text[s.pos]
if ch == '\\' {
s.pos += 2
continue
}
if ch == quote {
s.advance()
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
if ch == '\n' || ch == '\r' {
// Unterminated string
break
}
s.pos++
}
return Token{Kind: KindStringLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanTemplatePart(start int, isRescan bool) Token {
if isRescan {
// We're at a '}' that needs to be rescanned as TemplateMiddle or TemplateTail
s.advance() // skip }
} else {
s.advance() // skip `
}
for s.pos < len(s.text) {
ch := s.text[s.pos]
if ch == '\\' {
s.pos += 2
continue
}
if ch == '`' {
s.advance()
if isRescan {
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
}
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
if ch == '$' && s.peekAt(1) == '{' {
s.pos += 2
if isRescan {
return Token{Kind: KindTemplateMiddle, TokenPos: start, Text: s.text[start:s.pos]}
}
return Token{Kind: KindTemplateHead, TokenPos: start, Text: s.text[start:s.pos]}
}
s.pos++
}
// Unterminated
if isRescan {
return Token{Kind: KindTemplateTail, TokenPos: start, Text: s.text[start:s.pos]}
}
return Token{Kind: KindNoSubstitutionTemplateLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanRegExp(start int) Token {
s.advance() // skip /
inCharClass := false
for s.pos < len(s.text) {
ch := s.text[s.pos]
if ch == '\\' {
s.pos += 2
continue
}
if ch == '[' {
inCharClass = true
s.pos++
continue
}
if ch == ']' {
inCharClass = false
s.pos++
continue
}
if ch == '/' && !inCharClass {
s.advance() // skip closing /
// Scan flags
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
s.pos++
}
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
if ch == '\n' || ch == '\r' {
break
}
s.pos++
}
return Token{Kind: KindRegularExpressionLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanGreater(start int) Token {
s.advance() // skip >
if s.peek() == '>' {
s.advance()
if s.peek() == '>' {
s.advance()
if s.peek() == '=' {
s.advance()
return Token{Kind: KindGreaterThanGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>>="}
}
return Token{Kind: KindGreaterThanGreaterThanGreaterThanToken, TokenPos: start, Text: ">>>"}
}
if s.peek() == '=' {
s.advance()
return Token{Kind: KindGreaterThanGreaterThanEqualsToken, TokenPos: start, Text: ">>="}
}
return Token{Kind: KindGreaterThanGreaterThanToken, TokenPos: start, Text: ">>"}
}
if s.peek() == '=' {
s.advance()
return Token{Kind: KindGreaterThanEqualsToken, TokenPos: start, Text: ">="}
}
return Token{Kind: KindGreaterThanToken, TokenPos: start, Text: ">"}
}
func (s *Scanner) scanNumber(start int) Token {
if s.peek() == '0' {
next := s.peekAt(1)
if next == 'x' || next == 'X' {
s.pos += 2
s.scanHexDigits()
return s.finishBigIntOrNumber(start)
}
if next == 'b' || next == 'B' {
s.pos += 2
s.scanBinaryDigits()
return s.finishBigIntOrNumber(start)
}
if next == 'o' || next == 'O' {
s.pos += 2
s.scanOctalDigits()
return s.finishBigIntOrNumber(start)
}
}
s.scanDecimalDigits()
if s.peek() == '.' {
s.advance()
s.scanDecimalDigits()
}
if s.peek() == 'e' || s.peek() == 'E' {
s.advance()
if s.peek() == '+' || s.peek() == '-' {
s.advance()
}
s.scanDecimalDigits()
}
return s.finishBigIntOrNumber(start)
}
func (s *Scanner) finishBigIntOrNumber(start int) Token {
if s.peek() == 'n' {
s.advance()
return Token{Kind: KindBigIntLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
return Token{Kind: KindNumericLiteral, TokenPos: start, Text: s.text[start:s.pos]}
}
func (s *Scanner) scanDecimalDigits() {
for s.pos < len(s.text) {
ch := s.text[s.pos]
if (ch >= '0' && ch <= '9') || ch == '_' {
s.pos++
} else {
break
}
}
}
func (s *Scanner) scanHexDigits() {
for s.pos < len(s.text) {
ch := s.text[s.pos]
if (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') || ch == '_' {
s.pos++
} else {
break
}
}
}
func (s *Scanner) scanBinaryDigits() {
for s.pos < len(s.text) {
ch := s.text[s.pos]
if ch == '0' || ch == '1' || ch == '_' {
s.pos++
} else {
break
}
}
}
func (s *Scanner) scanOctalDigits() {
for s.pos < len(s.text) {
ch := s.text[s.pos]
if (ch >= '0' && ch <= '7') || ch == '_' {
s.pos++
} else {
break
}
}
}
func (s *Scanner) scanIdentifierOrKeyword(start int) Token {
for s.pos < len(s.text) {
ch := s.text[s.pos]
if isIdentChar(ch) {
s.pos++
} else if ch >= 0x80 {
r, size := utf8.DecodeRuneInString(s.text[s.pos:])
if r != utf8.RuneError && (unicode.IsLetter(r) || unicode.IsDigit(r) || r == '\u200C' || r == '\u200D') {
s.pos += size
} else {
break
}
} else {
break
}
}
text := s.text[start:s.pos]
if kind, ok := keywordKinds[text]; ok {
return Token{Kind: kind, TokenPos: start, Text: text}
}
return Token{Kind: KindIdentifier, TokenPos: start, Text: text}
}
func (s *Scanner) scanPrivateIdentifier(start int) Token {
s.advance() // skip #
for s.pos < len(s.text) && isIdentChar(s.text[s.pos]) {
s.pos++
}
return Token{Kind: KindPrivateIdentifier, TokenPos: start, Text: s.text[start:s.pos]}
}
func isIdentStartByte(ch byte) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '$'
}
func isIdentStart(ch byte) bool {
return isIdentStartByte(ch)
}
func isIdentStartRune(r rune) bool {
return unicode.IsLetter(r) || r == '_' || r == '$'
}
func isIdentChar(ch byte) bool {
return isIdentStartByte(ch) || (ch >= '0' && ch <= '9')
}
// keywordKinds maps keyword text to TS7 SyntaxKind values.
// These start at KindBreakKeyword = 82.
var keywordKinds = map[string]int{
"break": 82,
"case": 83,
"catch": 84,
"class": 85,
"const": 86,
"continue": 87,
"debugger": 88,
"default": 89,
"delete": 90,
"do": 91,
"else": 92,
"enum": 93,
"export": 94,
"extends": 95,
"false": 96,
"finally": 97,
"for": 98,
"function": 99,
"if": 100,
"import": 101,
"in": 102,
"instanceof": 103,
"new": 104,
"null": 105,
"return": 106,
"super": 107,
"switch": 108,
"this": 109,
"throw": 110,
"true": 111,
"try": 112,
"typeof": 113,
"var": 114,
"void": 115,
"while": 116,
"with": 117,
// Strict mode reserved words
"implements": 118,
"interface": 119,
"let": 120,
"package": 121,
"private": 122,
"protected": 123,
"public": 124,
"static": 125,
"yield": 126,
// Contextual keywords
"abstract": 127,
"accessor": 128,
"as": 129,
"asserts": 130,
"assert": 131,
"any": 132,
"async": 133,
"await": 134,
"boolean": 135,
"constructor": 136,
"declare": 137,
"get": 138,
"immediate": 139,
"infer": 140,
"intrinsic": 141,
"is": 142,
"keyof": 143,
"module": 144,
"namespace": 145,
"never": 146,
"out": 147,
"readonly": 148,
"require": 149,
"number": 150,
"object": 151,
"satisfies": 152,
"set": 153,
"string": 154,
"symbol": 155,
"type": 156,
"undefined": 157,
"unique": 158,
"unknown": 159,
"using": 160,
"from": 161,
"global": 162,
"bigint": 163,
"override": 164,
"of": 165,
"defer": 166,
}

View File

@@ -1,279 +1,335 @@
package tsparser
// getStaticTS7Metadata returns hardcoded metadata for TypeScript 7.
// GetStaticTS7Metadata returns hardcoded metadata for TypeScript 7.
// This must be kept in sync with the TypeScript compiler's SyntaxKind and
// NodeFlags enums. Eventually this should be obtained dynamically from
// the tsgo API.
// NodeFlags enums.
//
// The SyntaxKind values here correspond to the TypeScript 7 (Go port)
// compiler. The Java extractor uses the string names (not numeric IDs)
// to identify node kinds, so the exact numeric values only matter for
// the metadata response.
func getStaticTS7Metadata() *Metadata {
func GetStaticTS7Metadata() *Metadata {
return &Metadata{
SyntaxKinds: syntaxKinds,
NodeFlags: nodeFlags,
}
}
// GetSyntaxKinds returns the raw SyntaxKind name→number map.
func GetSyntaxKinds() map[string]int {
return syntaxKinds
}
// BuildKindToNameMap returns a number→name reverse map for SyntaxKinds.
func BuildKindToNameMap() map[uint32]string {
m := make(map[uint32]string, len(syntaxKinds))
for name, num := range syntaxKinds {
key := uint32(num)
if existing, ok := m[key]; !ok || len(name) < len(existing) {
m[key] = name
}
}
return m
}
// syntaxKinds maps SyntaxKind names to their numeric values in TypeScript 7.
// This is a subset covering the kinds most commonly seen in parsed ASTs.
// The full set should be generated from the TypeScript source.
// Generated from microsoft/typescript-go/internal/ast/kind.go (iota enum).
var syntaxKinds = map[string]int{
"Unknown": 0,
"EndOfFileToken": 1,
"SingleLineCommentTrivia": 2,
"MultiLineCommentTrivia": 3,
"NewLineTrivia": 4,
"WhitespaceTrivia": 5,
"NumericLiteral": 9,
"BigIntLiteral": 10,
"StringLiteral": 11,
"RegularExpressionLiteral": 14,
"NoSubstitutionTemplateLiteral": 15,
"TemplateHead": 16,
"TemplateMiddle": 17,
"TemplateTail": 18,
"OpenBraceToken": 19,
"CloseBraceToken": 20,
"OpenParenToken": 21,
"CloseParenToken": 22,
"OpenBracketToken": 23,
"CloseBracketToken": 24,
"DotToken": 25,
"DotDotDotToken": 26,
"SemicolonToken": 27,
"CommaToken": 28,
"QuestionDotToken": 29,
"LessThanToken": 30,
"GreaterThanToken": 31,
"EqualsToken": 64,
"PlusToken": 40,
"MinusToken": 41,
"AsteriskToken": 42,
"SlashToken": 44,
"ExclamationToken": 54,
"QuestionToken": 58,
"ColonToken": 59,
"AtToken": 60,
"EqualsGreaterThanToken": 39,
"Identifier": 80,
"BreakKeyword": 83,
"CaseKeyword": 84,
"CatchKeyword": 85,
"ClassKeyword": 86,
"ConstKeyword": 87,
"ContinueKeyword": 88,
"DebuggerKeyword": 89,
"DefaultKeyword": 90,
"DeleteKeyword": 91,
"DoKeyword": 92,
"ElseKeyword": 93,
"EnumKeyword": 94,
"ExportKeyword": 95,
"ExtendsKeyword": 96,
"FalseKeyword": 97,
"FinallyKeyword": 98,
"ForKeyword": 99,
"FunctionKeyword": 100,
"IfKeyword": 101,
"ImportKeyword": 102,
"InKeyword": 103,
"InstanceOfKeyword": 104,
"NewKeyword": 105,
"NullKeyword": 106,
"ReturnKeyword": 107,
"SuperKeyword": 108,
"SwitchKeyword": 109,
"ThisKeyword": 110,
"ThrowKeyword": 111,
"TrueKeyword": 112,
"TryKeyword": 113,
"TypeOfKeyword": 114,
"VarKeyword": 115,
"VoidKeyword": 116,
"WhileKeyword": 117,
"WithKeyword": 118,
"ImplementsKeyword": 119,
"InterfaceKeyword": 120,
"LetKeyword": 121,
"PackageKeyword": 122,
"PrivateKeyword": 123,
"ProtectedKeyword": 124,
"PublicKeyword": 125,
"StaticKeyword": 126,
"YieldKeyword": 127,
"AbstractKeyword": 128,
"AccessorKeyword": 129,
"AsKeyword": 130,
"AsyncKeyword": 134,
"AwaitKeyword": 135,
"ConstructorKeyword": 137,
"DeclareKeyword": 138,
"GetKeyword": 139,
"InferKeyword": 140,
"IsKeyword": 142,
"KeyOfKeyword": 143,
"ModuleKeyword": 144,
"NamespaceKeyword": 145,
"NeverKeyword": 146,
"ReadonlyKeyword": 148,
"RequireKeyword": 149,
"NumberKeyword": 150,
"ObjectKeyword": 151,
"SetKeyword": 152,
"StringKeyword": 153,
"SymbolKeyword": 154,
"TypeKeyword": 155,
"UndefinedKeyword": 157,
"UniqueKeyword": 158,
"FromKeyword": 161,
"OfKeyword": 165,
"QualifiedName": 166,
"ComputedPropertyName": 167,
"TypeParameter": 168,
"Parameter": 169,
"Decorator": 170,
"PropertySignature": 171,
"PropertyDeclaration": 172,
"MethodSignature": 173,
"MethodDeclaration": 174,
"ClassStaticBlockDeclaration": 175,
"Constructor": 176,
"GetAccessor": 177,
"SetAccessor": 178,
"CallSignature": 179,
"ConstructSignature": 180,
"IndexSignature": 181,
"TypePredicate": 182,
"TypeReference": 183,
"FunctionType": 184,
"ConstructorType": 185,
"TypeQuery": 186,
"TypeLiteral": 187,
"ArrayType": 188,
"TupleType": 189,
"OptionalType": 190,
"RestType": 191,
"UnionType": 192,
"IntersectionType": 193,
"ConditionalType": 194,
"InferType": 195,
"ParenthesizedType": 196,
"ThisType": 197,
"TypeOperator": 198,
"IndexedAccessType": 199,
"MappedType": 200,
"LiteralType": 201,
"NamedTupleMember": 202,
"TemplateLiteralType": 203,
"TemplateLiteralTypeSpan": 204,
"ImportType": 205,
"ObjectBindingPattern": 206,
"ArrayBindingPattern": 207,
"BindingElement": 208,
"ArrayLiteralExpression": 209,
"ObjectLiteralExpression": 210,
"PropertyAccessExpression": 211,
"ElementAccessExpression": 212,
"CallExpression": 213,
"NewExpression": 214,
"TaggedTemplateExpression": 215,
"TypeAssertionExpression": 216,
"ParenthesizedExpression": 217,
"FunctionExpression": 218,
"ArrowFunction": 219,
"DeleteExpression": 220,
"TypeOfExpression": 221,
"VoidExpression": 222,
"AwaitExpression": 223,
"PrefixUnaryExpression": 224,
"PostfixUnaryExpression": 225,
"BinaryExpression": 226,
"ConditionalExpression": 227,
"TemplateExpression": 228,
"YieldExpression": 229,
"SpreadElement": 230,
"ClassExpression": 231,
"ExpressionWithTypeArguments": 233,
"AsExpression": 234,
"NonNullExpression": 235,
"MetaProperty": 236,
"SyntheticExpression": 237,
"SatisfiesExpression": 238,
"TemplateSpan": 239,
"SemicolonClassElement": 240,
"Block": 241,
"EmptyStatement": 242,
"VariableStatement": 243,
"ExpressionStatement": 244,
"IfStatement": 245,
"DoStatement": 246,
"WhileStatement": 247,
"ForStatement": 248,
"ForInStatement": 249,
"ForOfStatement": 250,
"ContinueStatement": 251,
"BreakStatement": 252,
"ReturnStatement": 253,
"WithStatement": 254,
"SwitchStatement": 255,
"LabeledStatement": 256,
"ThrowStatement": 257,
"TryStatement": 258,
"DebuggerStatement": 259,
"VariableDeclaration": 260,
"VariableDeclarationList": 261,
"FunctionDeclaration": 262,
"ClassDeclaration": 263,
"InterfaceDeclaration": 264,
"TypeAliasDeclaration": 265,
"EnumDeclaration": 266,
"ModuleDeclaration": 267,
"ModuleBlock": 268,
"CaseBlock": 269,
"NamespaceExportDeclaration": 270,
"ImportEqualsDeclaration": 271,
"ImportDeclaration": 272,
"ImportClause": 273,
"NamespaceImport": 274,
"NamedImports": 275,
"ImportSpecifier": 276,
"ExportAssignment": 277,
"ExportDeclaration": 278,
"NamedExports": 279,
"NamespaceExport": 280,
"ExportSpecifier": 281,
"ExternalModuleReference": 283,
"CaseClause": 295,
"DefaultClause": 296,
"HeritageClause": 297,
"CatchClause": 298,
"ImportAttributes": 302,
"ImportAttribute": 303,
"PropertyAssignment": 304,
"ShorthandPropertyAssignment": 305,
"SpreadAssignment": 306,
"EnumMember": 307,
"SourceFile": 316,
"NotEmittedStatement": 354,
"CommaListExpression": 360,
"SyntaxList": 362,
"JSDocTypeExpression": 316,
"JSDocComment": 327,
"JsxElement": 284,
"JsxSelfClosingElement": 285,
"JsxOpeningElement": 286,
"JsxClosingElement": 287,
"JsxFragment": 288,
"JsxOpeningFragment": 289,
"JsxClosingFragment": 290,
"JsxAttribute": 291,
"JsxAttributes": 292,
"JsxSpreadAttribute": 293,
"JsxExpression": 294,
"JsxText": 12,
"JsxTextAllWhiteSpaces": 13,
"Unknown": 0,
"EndOfFile": 1,
"NumericLiteral": 8,
"BigIntLiteral": 9,
"StringLiteral": 10,
"JsxText": 11,
"JsxTextAllWhiteSpaces": 12,
"RegularExpressionLiteral": 13,
"NoSubstitutionTemplateLiteral": 14,
"TemplateHead": 15,
"TemplateMiddle": 16,
"TemplateTail": 17,
"OpenBraceToken": 18,
"CloseBraceToken": 19,
"OpenParenToken": 20,
"CloseParenToken": 21,
"OpenBracketToken": 22,
"CloseBracketToken": 23,
"DotToken": 24,
"DotDotDotToken": 25,
"SemicolonToken": 26,
"CommaToken": 27,
"QuestionDotToken": 28,
"LessThanToken": 29,
"GreaterThanToken": 31,
"EqualsGreaterThanToken": 38,
"PlusToken": 39,
"MinusToken": 40,
"AsteriskToken": 41,
"SlashToken": 43,
"PlusPlusToken": 45,
"MinusMinusToken": 46,
"ExclamationToken": 53,
"TildeToken": 54,
"QuestionToken": 57,
"ColonToken": 58,
"AtToken": 59,
"EqualsToken": 63,
"Identifier": 79,
"PrivateIdentifier": 80,
"BreakKeyword": 82,
"CaseKeyword": 83,
"CatchKeyword": 84,
"ClassKeyword": 85,
"ConstKeyword": 86,
"ContinueKeyword": 87,
"DebuggerKeyword": 88,
"DefaultKeyword": 89,
"DeleteKeyword": 90,
"DoKeyword": 91,
"ElseKeyword": 92,
"EnumKeyword": 93,
"ExportKeyword": 94,
"ExtendsKeyword": 95,
"FalseKeyword": 96,
"FinallyKeyword": 97,
"ForKeyword": 98,
"FunctionKeyword": 99,
"IfKeyword": 100,
"ImportKeyword": 101,
"InKeyword": 102,
"InstanceOfKeyword": 103,
"NewKeyword": 104,
"NullKeyword": 105,
"ReturnKeyword": 106,
"SuperKeyword": 107,
"SwitchKeyword": 108,
"ThisKeyword": 109,
"ThrowKeyword": 110,
"TrueKeyword": 111,
"TryKeyword": 112,
"TypeOfKeyword": 113,
"VarKeyword": 114,
"VoidKeyword": 115,
"WhileKeyword": 116,
"WithKeyword": 117,
"ImplementsKeyword": 118,
"InterfaceKeyword": 119,
"LetKeyword": 120,
"PackageKeyword": 121,
"PrivateKeyword": 122,
"ProtectedKeyword": 123,
"PublicKeyword": 124,
"StaticKeyword": 125,
"YieldKeyword": 126,
"AbstractKeyword": 127,
"AccessorKeyword": 128,
"AsKeyword": 129,
"AssertsKeyword": 130,
"AssertKeyword": 131,
"AnyKeyword": 132,
"AsyncKeyword": 133,
"AwaitKeyword": 134,
"BooleanKeyword": 135,
"ConstructorKeyword": 136,
"DeclareKeyword": 137,
"GetKeyword": 138,
"InferKeyword": 140,
"IntrinsicKeyword": 141,
"IsKeyword": 142,
"KeyOfKeyword": 143,
"ModuleKeyword": 144,
"NamespaceKeyword": 145,
"NeverKeyword": 146,
"ReadonlyKeyword": 148,
"RequireKeyword": 149,
"NumberKeyword": 150,
"ObjectKeyword": 151,
"SetKeyword": 153,
"StringKeyword": 154,
"SymbolKeyword": 155,
"TypeKeyword": 156,
"UndefinedKeyword": 157,
"UniqueKeyword": 158,
"UnknownKeyword": 159,
"FromKeyword": 161,
"BigIntKeyword": 163,
"OverrideKeyword": 164,
"OfKeyword": 165,
"DeferKeyword": 166,
"QualifiedName": 167,
"ComputedPropertyName": 168,
"TypeParameter": 169,
"Parameter": 170,
"Decorator": 171,
"PropertySignature": 172,
"PropertyDeclaration": 173,
"MethodSignature": 174,
"MethodDeclaration": 175,
"ClassStaticBlockDeclaration": 176,
"Constructor": 177,
"GetAccessor": 178,
"SetAccessor": 179,
"CallSignature": 180,
"ConstructSignature": 181,
"IndexSignature": 182,
"TypePredicate": 183,
"TypeReference": 184,
"FunctionType": 185,
"ConstructorType": 186,
"TypeQuery": 187,
"TypeLiteral": 188,
"ArrayType": 189,
"TupleType": 190,
"OptionalType": 191,
"RestType": 192,
"UnionType": 193,
"IntersectionType": 194,
"ConditionalType": 195,
"InferType": 196,
"ParenthesizedType": 197,
"ThisType": 198,
"TypeOperator": 199,
"IndexedAccessType": 200,
"MappedType": 201,
"LiteralType": 202,
"NamedTupleMember": 203,
"TemplateLiteralType": 204,
"TemplateLiteralTypeSpan": 205,
"ImportType": 206,
"ObjectBindingPattern": 207,
"ArrayBindingPattern": 208,
"BindingElement": 209,
"ArrayLiteralExpression": 210,
"ObjectLiteralExpression": 211,
"PropertyAccessExpression": 212,
"ElementAccessExpression": 213,
"CallExpression": 214,
"NewExpression": 215,
"TaggedTemplateExpression": 216,
"TypeAssertionExpression": 217,
"ParenthesizedExpression": 218,
"FunctionExpression": 219,
"ArrowFunction": 220,
"DeleteExpression": 221,
"TypeOfExpression": 222,
"VoidExpression": 223,
"AwaitExpression": 224,
"PrefixUnaryExpression": 225,
"PostfixUnaryExpression": 226,
"BinaryExpression": 227,
"ConditionalExpression": 228,
"TemplateExpression": 229,
"YieldExpression": 230,
"SpreadElement": 231,
"ClassExpression": 232,
"OmittedExpression": 233,
"ExpressionWithTypeArguments": 234,
"AsExpression": 235,
"NonNullExpression": 236,
"MetaProperty": 237,
"SatisfiesExpression": 239,
"TemplateSpan": 240,
"SemicolonClassElement": 241,
"Block": 242,
"EmptyStatement": 243,
"VariableStatement": 244,
"ExpressionStatement": 245,
"IfStatement": 246,
"DoStatement": 247,
"WhileStatement": 248,
"ForStatement": 249,
"ForInStatement": 250,
"ForOfStatement": 251,
"ContinueStatement": 252,
"BreakStatement": 253,
"ReturnStatement": 254,
"WithStatement": 255,
"SwitchStatement": 256,
"LabeledStatement": 257,
"ThrowStatement": 258,
"TryStatement": 259,
"DebuggerStatement": 260,
"VariableDeclaration": 261,
"VariableDeclarationList": 262,
"FunctionDeclaration": 263,
"ClassDeclaration": 264,
"InterfaceDeclaration": 265,
"TypeAliasDeclaration": 266,
"EnumDeclaration": 267,
"ModuleDeclaration": 268,
"ModuleBlock": 269,
"CaseBlock": 270,
"NamespaceExportDeclaration": 271,
"ImportEqualsDeclaration": 272,
"ImportDeclaration": 273,
"ImportClause": 274,
"NamespaceImport": 275,
"NamedImports": 276,
"ImportSpecifier": 277,
"ExportAssignment": 278,
"ExportDeclaration": 279,
"NamedExports": 280,
"NamespaceExport": 281,
"ExportSpecifier": 282,
"MissingDeclaration": 283,
"ExternalModuleReference": 284,
"JsxElement": 285,
"JsxSelfClosingElement": 286,
"JsxOpeningElement": 287,
"JsxClosingElement": 288,
"JsxFragment": 289,
"JsxOpeningFragment": 290,
"JsxClosingFragment": 291,
"JsxAttribute": 292,
"JsxAttributes": 293,
"JsxSpreadAttribute": 294,
"JsxExpression": 295,
"JsxNamespacedName": 296,
"CaseClause": 297,
"DefaultClause": 298,
"HeritageClause": 299,
"CatchClause": 300,
"ImportAttributes": 301,
"ImportAttribute": 302,
"PropertyAssignment": 303,
"ShorthandPropertyAssignment": 304,
"SpreadAssignment": 305,
"EnumMember": 306,
"SourceFile": 307,
"JSDocTypeExpression": 308,
"JSDocNameReference": 309,
"JSDocNullableType": 312,
"JSDocNonNullableType": 313,
"JSDocOptionalType": 314,
"JSDocVariadicType": 315,
"JSDoc": 316,
"JSDocText": 317,
"JSDocTypeLiteral": 318,
"JSDocSignature": 319,
"JSDocLink": 320,
"JSDocLinkCode": 321,
"JSDocLinkPlain": 322,
"JSDocTag": 323,
"JSDocAugmentsTag": 324,
"JSDocImplementsTag": 325,
"JSDocDeprecatedTag": 326,
"JSDocPublicTag": 327,
"JSDocPrivateTag": 328,
"JSDocProtectedTag": 329,
"JSDocReadonlyTag": 330,
"JSDocOverrideTag": 331,
"JSDocCallbackTag": 332,
"JSDocOverloadTag": 333,
"JSDocParameterTag": 334,
"JSDocReturnTag": 335,
"JSDocThisTag": 336,
"JSDocTypeTag": 337,
"JSDocTemplateTag": 338,
"JSDocTypedefTag": 339,
"JSDocSeeTag": 340,
"JSDocPropertyTag": 341,
"JSDocThrowsTag": 342,
"JSDocSatisfiesTag": 343,
"JSDocImportTag": 344,
}
// nodeFlags maps NodeFlags names to their numeric values.

View File

@@ -100,7 +100,7 @@ func (p *StandaloneParser) Parse(filename string) (*ParseResult, error) {
// GetMetadata returns static TS7 metadata.
func (p *StandaloneParser) GetMetadata() (*Metadata, error) {
return getStaticTS7Metadata(), nil
return GetStaticTS7Metadata(), nil
}
// Reset is a no-op for the standalone parser.

View File

@@ -11,6 +11,8 @@ import (
"path/filepath"
"strconv"
"sync"
"github.com/github/codeql/javascript/extractor/lib/typescript-go/internal/astconv"
)
// TsgoParser implements the Parser interface by running the tsgo binary
@@ -186,26 +188,36 @@ func (p *TsgoParser) sendRequest(method string, params interface{}) (json.RawMes
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
fmt.Fprintf(os.Stderr, "[tsgo] >>> %s id=%d\n", method, id)
if err := p.writeMessage(data); err != nil {
return nil, fmt.Errorf("failed to write request: %w", err)
}
// Read the response
respData, err := p.readMessage()
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
// Read responses, skipping notifications (messages without a matching id).
// In --async mode, tsgo may send diagnostic notifications between responses.
for {
respData, err := p.readMessage()
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
var resp jsonRPCResponse
if err := json.Unmarshal(respData, &resp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var resp jsonRPCResponse
if err := json.Unmarshal(respData, &resp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
if resp.Error != nil {
return nil, fmt.Errorf("tsgo API error %d: %s", resp.Error.Code, resp.Error.Message)
}
// Skip notifications (id=0 means no id field was present in JSON)
if resp.ID != id {
continue
}
return resp.Result, nil
if resp.Error != nil {
return nil, fmt.Errorf("tsgo API error %d: %s", resp.Error.Code, resp.Error.Message)
}
return resp.Result, nil
}
}
// call sends a request with proper locking and initialization.
@@ -229,30 +241,48 @@ type updateSnapshotResponse struct {
} `json:"projects"`
}
// ensureProjectOpen opens a project for the given file's directory using
// a temporary tsconfig, or uses the existing snapshot if already open.
// ensureProjectOpen opens a project for the given file.
// The tsgo API requires a tsconfig for project opening, so if none exists
// in the file's directory, we create a temporary one.
func (p *TsgoParser) ensureProjectOpen(filename string) error {
if p.snapshotHandle != "" && p.projectHandle != "" {
return nil
}
// Create a snapshot by opening a project.
// For single-file parsing without a tsconfig, we ask tsgo to open
// the file's directory as a project. The tsgo API requires a
// tsconfig path for OpenProject.
dir := filepath.Dir(filename)
base := filepath.Base(filename)
tsconfigPath := filepath.Join(dir, "tsconfig.json")
// First try: updateSnapshot with the file's directory tsconfig
// If no tsconfig exists, create a temporary one
createdTsconfig := false
if _, err := os.Stat(tsconfigPath); os.IsNotExist(err) {
tsconfig := fmt.Sprintf(`{
"compilerOptions": {
"target": "esnext",
"module": "esnext",
"noEmit": true,
"strict": false,
"allowJs": true
},
"files": [%q]
}`, base)
if err := os.WriteFile(tsconfigPath, []byte(tsconfig), 0644); err != nil {
return fmt.Errorf("failed to create temporary tsconfig: %w", err)
}
createdTsconfig = true
}
result, err := p.sendRequest("updateSnapshot", map[string]interface{}{
"openProject": tsconfigPath,
})
// Clean up temporary tsconfig
if createdTsconfig {
os.Remove(tsconfigPath)
}
if err != nil {
// If no tsconfig exists, try without a project
result, err = p.sendRequest("updateSnapshot", map[string]interface{}{})
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
return fmt.Errorf("failed to open project: %w", err)
}
var resp updateSnapshotResponse
@@ -303,18 +333,37 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) {
return nil, fmt.Errorf("parse %s: %w", filename, err)
}
// The result is the binary-encoded source file data (base64 when
// using JSON protocol). For now, store the raw response.
// TODO: Decode the binary format into a JSON AST.
// The result is {"data":"<base64>"} containing a binary-encoded AST.
var dataResp struct {
Data string `json:"data"`
}
if err := json.Unmarshal(result, &dataResp); err != nil {
return nil, fmt.Errorf("parse %s: failed to parse getSourceFile response: %w", filename, err)
}
binaryAST, err := astconv.DecodeBinaryASTFromBase64(dataResp.Data)
if err != nil {
return nil, fmt.Errorf("parse %s: failed to decode binary AST: %w", filename, err)
}
kindToName := BuildKindToNameMap()
converter := astconv.NewConverter(binaryAST, kindToName)
astObj, err := converter.Convert()
if err != nil {
return nil, fmt.Errorf("parse %s: failed to convert AST: %w", filename, err)
}
filtered := astconv.FilterWhitelist(astObj)
return &ParseResult{
AST: result,
RawData: []byte(result),
AST: filtered,
RawData: []byte(dataResp.Data),
}, nil
}
// GetMetadata returns the syntax kinds and node flags.
func (p *TsgoParser) GetMetadata() (*Metadata, error) {
return getStaticTS7Metadata(), nil
return GetStaticTS7Metadata(), nil
}
// Reset resets the parser state, killing and restarting the subprocess.

View File

@@ -193,7 +193,7 @@ func TestTsgoGetMetadata(t *testing.T) {
}
func TestStaticMetadata(t *testing.T) {
meta := getStaticTS7Metadata()
meta := GetStaticTS7Metadata()
required := []string{"SourceFile", "Identifier", "Block", "VariableStatement",
"FunctionDeclaration", "ClassDeclaration", "InterfaceDeclaration"}
@@ -235,3 +235,68 @@ func min(a, b int) int {
}
return b
}
func TestTsgoParse(t *testing.T) {
if _, err := exec.LookPath("tsgo"); err != nil {
t.Skip("tsgo not found on PATH")
}
sampleFile := findTestFile(t)
parser := NewTsgoParser(Config{Stderr: os.Stderr})
defer parser.Close()
result, err := parser.Parse(sampleFile)
if err != nil {
t.Fatalf("Parse failed: %v", err)
}
ast, ok := result.AST.(map[string]interface{})
if !ok {
t.Fatalf("Expected AST to be map[string]interface{}, got %T", result.AST)
}
// Verify the root is a SourceFile
kindVal, ok := ast["kind"]
if !ok {
t.Fatal("Missing 'kind' property on root node")
}
kindNum, ok := kindVal.(int)
if !ok {
t.Fatalf("Expected 'kind' to be int, got %T", kindVal)
}
if kindNum != 307 { // SourceFile = 307 in TS7
t.Errorf("Expected root kind=307 (SourceFile), got %d", kindNum)
}
// Verify $pos and $end
if _, ok := ast["$pos"]; !ok {
t.Error("Missing '$pos' property")
}
if _, ok := ast["$end"]; !ok {
t.Error("Missing '$end' property")
}
// Verify statements array
stmts, ok := ast["statements"]
if !ok {
t.Fatal("Missing 'statements' property")
}
stmtsArr, ok := stmts.([]interface{})
if !ok {
t.Fatalf("Expected statements to be array, got %T", stmts)
}
if len(stmtsArr) == 0 {
t.Error("Expected non-empty statements array")
}
// Print a nicely indented snippet for debug
jsonBytes, err := json.MarshalIndent(ast, "", " ")
if err != nil {
t.Fatalf("Failed to marshal AST: %v", err)
}
snippet := string(jsonBytes)
if len(snippet) > 2000 {
snippet = snippet[:2000] + "\n... (truncated)"
}
t.Logf("Parsed AST (first 2000 chars):\n%s", snippet)
}

View File

@@ -297,10 +297,20 @@ func TestCompareOutputs(t *testing.T) {
os.WriteFile(filepath.Join(outDir, basename+".nodejs.json"), nodejsNorm, 0644)
os.WriteFile(filepath.Join(outDir, basename+".go.json"), goNorm, 0644)
t.Errorf("Output mismatch for %s\n"+
" Node.js output saved to: validation-output/%s.nodejs.json\n"+
" Go output saved to: validation-output/%s.go.json",
basename, basename, basename)
// Parse both outputs and check for structural diffs (ignoring expected kind/flags differences)
var nodejsObj, goObj map[string]interface{}
json.Unmarshal(nodejsNorm, &nodejsObj)
json.Unmarshal(goNorm, &goObj)
structural := countStructuralDiffs(nodejsObj["ast"], goObj["ast"], "root")
if structural > 0 {
t.Errorf("Output has %d structural diff(s) for %s (beyond expected kind/flags diffs)\n"+
" Node.js output saved to: validation-output/%s.nodejs.json\n"+
" Go output saved to: validation-output/%s.go.json",
structural, basename, basename, basename)
} else {
t.Logf("Output for %s differs only in expected kind/flags/token numeric values (TS5 vs TS7)", basename)
}
}
})
}
@@ -324,3 +334,76 @@ func TestNormalizeJSON(t *testing.T) {
t.Errorf("got:\n%s\nexpected:\n%s", string(result), expected)
}
}
// numericValueKeys are JSON object keys whose numeric values are expected to differ
// between TS5 and TS7 (SyntaxKind/NodeFlags numeric values).
var numericValueKeys = map[string]bool{
"kind": true,
"flags": true,
"token": true,
"operator": true,
}
// countStructuralDiffs recursively compares two JSON values and returns the
// number of differences that are NOT expected TS5↔TS7 numeric kind/flags diffs.
func countStructuralDiffs(a, b interface{}, path string) int {
count := 0
switch av := a.(type) {
case map[string]interface{}:
bv, ok := b.(map[string]interface{})
if !ok {
return 1
}
allKeys := map[string]bool{}
for k := range av {
allKeys[k] = true
}
for k := range bv {
allKeys[k] = true
}
for k := range allKeys {
aVal, aOk := av[k]
bVal, bOk := bv[k]
if !aOk || !bOk {
count++
continue
}
count += countStructuralDiffs(aVal, bVal, path+"."+k)
}
case []interface{}:
bv, ok := b.([]interface{})
if !ok {
return 1
}
if len(av) != len(bv) {
return 1
}
for i := range av {
count += countStructuralDiffs(av[i], bv[i], fmt.Sprintf("%s[%d]", path, i))
}
default:
if a != b {
// Check if this is an expected numeric diff for kind/flags/token/operator
key := lastPathComponent(path)
if numericValueKeys[key] {
// Both must be numbers for this to be an expected diff
_, aNum := a.(float64)
_, bNum := b.(float64)
if aNum && bNum {
return 0 // Expected TS5↔TS7 numeric diff
}
}
count++
}
}
return count
}
func lastPathComponent(path string) string {
for i := len(path) - 1; i >= 0; i-- {
if path[i] == '.' {
return path[i+1:]
}
}
return path
}