TypeScript Go extractor: metadata fixes, NestedNamespace inference, and scanner improvements

- Fix TS7 nodeFlags: remove Synthesized (shifted in TS7), add GlobalAugmentation=64,
  correct OptionalChain=32, Namespace=16, shift subsequent flags
- Add 33 missing operator/punctuation token kinds to syntaxKinds metadata
- Infer NestedNamespace flag for dotted namespace declarations (TS7 binary
  doesn't set it, but Java extractor needs it)
- Fix shebang handling: emit ShebangTrivia (kind 6) instead of SingleLineCommentTrivia
- Fix token kinds for regex/template rescans to match TS5 pre-rescan behavior
  (SlashToken for regexes, CloseBraceToken for template continuations)
- Fix augmentPos to correctly skip comments (matching TS5's trivia-skipping regex)
- Resolve native tsgo binary from npm wrapper to avoid Node.js dependency
- Update project-layout glob for worktree support

TRAP test results: 493/495 passing (99.6%)
Remaining: badimport.ts (missing diagnostics), externalmodule.ts (structural diff)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Asger F
2026-04-13 15:11:48 +02:00
parent bd9d6b1962
commit 637ce99e44
5 changed files with 140 additions and 34 deletions

View File

@@ -82,6 +82,20 @@ func (c *Converter) convertNode(i int) (map[string]interface{}, error) {
// Add defined-bits-based properties
c.addDefinedBitProperties(i, kindName, node)
// TS7 doesn't set the NestedNamespace flag in the binary AST, but the Java
// extractor needs it to wrap inner namespace declarations in ExportNamedDeclaration.
// Detect nested namespaces (ModuleDeclaration whose body is another ModuleDeclaration)
// and add the flag to the inner declaration.
if kindName == "ModuleDeclaration" {
if body, ok := node["body"].(map[string]interface{}); ok {
if bodyKind, ok := body["kind"].(int); ok && bodyKind == 268 { // 268 = ModuleDeclaration
if flags, ok := body["flags"].(int); ok {
body["flags"] = flags | 8 // NestedNamespace = 8
}
}
}
}
return node, nil
}
@@ -491,10 +505,13 @@ func (c *Converter) addDefinedBitProperties(i int, kindName string, node map[str
}
// augmentPos replicates the Node.js wrapper's $pos augmentation:
// if skipTrivia is true, advances past leading whitespace and comments.
// if skip is true, advances past leading whitespace, single-line comments (//),
// and multi-line comments (/* */). This matches the TS5 Node.js wrapper regex:
// /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g
// Note: shebangs (#!) are NOT skipped — the TS5 regex does not match them.
// Input pos is a UTF-16 code unit offset; returns a UTF-16 code unit offset.
func (c *Converter) augmentPos(pos int, skipTrivia bool) int {
if !skipTrivia || c.sourceText == "" {
func (c *Converter) augmentPos(pos int, skip bool) int {
if !skip || c.sourceText == "" {
return pos
}
return byteToUTF16(c.skipTrivia(utf16ToByte(pos, c.byteOffsets)), c.utf16Offsets)
@@ -506,7 +523,8 @@ func (c *Converter) augmentBytePos(utf16Pos int) int {
return c.skipTrivia(utf16ToByte(utf16Pos, c.byteOffsets))
}
// skipTrivia advances past whitespace and comments starting at byte offset i.
// skipTrivia advances past whitespace, single-line comments (//), and
// multi-line comments (/* */), starting at byte offset i.
func (c *Converter) skipTrivia(i int) int {
n := len(c.sourceText)
for i < n {

View File

@@ -13,7 +13,8 @@ const (
KindMultiLineCommentTrivia = 3
KindNewLineTrivia = 4
KindWhitespaceTrivia = 5
KindConflictMarkerTrivia = 6
KindShebangTrivia = 6
KindConflictMarkerTrivia = 7
KindNumericLiteral = 8
KindBigIntLiteral = 9
KindStringLiteral = 10
@@ -120,7 +121,8 @@ func NewScanner(text string, rescanEvents []RescanEvent) *Scanner {
}
}
// ScanAll produces all tokens from the source text.
// ScanAll produces all tokens from the source text, including trivia
// (whitespace, newlines, comments), matching the Node.js wrapper behavior.
func (s *Scanner) ScanAll() []Token {
var tokens []Token
for {
@@ -206,15 +208,24 @@ func (s *Scanner) scan() Token {
return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]}
}
// Check for rescan event at this position
// Check for rescan event at this position.
// TS5's scanner loop captures the token kind BEFORE the rescan event fires,
// then uses the rescanned text. So regex tokens get kind=SlashToken with
// text="/pattern/flags", and template continuation tokens get kind=CloseBraceToken
// with the template text. We replicate this by scanning the full content but
// using the pre-rescan kind.
if tokenPos == s.nextRescanPos() {
kind := s.nextRescanKind()
s.consumeRescan()
switch kind {
case "regex":
return s.scanRegExp(tokenPos)
tok := s.scanRegExp(tokenPos)
tok.Kind = KindSlashToken
return tok
case "template":
return s.scanTemplatePart(tokenPos, true)
tok := s.scanTemplatePart(tokenPos, true)
tok.Kind = KindCloseBraceToken
return tok
case "greater":
return s.scanGreater(tokenPos)
}
@@ -454,8 +465,13 @@ func (s *Scanner) scan() Token {
case '#':
// Could be private identifier
if s.peekAt(1) == '!' && tokenPos == 0 {
// Shebang — scan to end of line
return s.scanSingleLineComment(tokenPos)
// Shebang — scan to end of line, emit as ShebangTrivia
start := s.pos
for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' {
s.pos++
}
text := s.text[start:s.pos]
return Token{Kind: KindShebangTrivia, TokenPos: tokenPos, Text: text}
}
if isIdentStart(s.peekAt(1)) {
return s.scanPrivateIdentifier(tokenPos)

View File

@@ -36,7 +36,13 @@ func BuildKindToNameMap() map[uint32]string {
// Generated from microsoft/typescript-go/internal/ast/kind.go (iota enum).
var syntaxKinds = map[string]int{
"Unknown": 0,
"EndOfFile": 1,
"EndOfFileToken": 1,
"SingleLineCommentTrivia": 2,
"MultiLineCommentTrivia": 3,
"NewLineTrivia": 4,
"WhitespaceTrivia": 5,
"ShebangTrivia": 6,
"ConflictMarkerTrivia": 7,
"NumericLiteral": 8,
"BigIntLiteral": 9,
"StringLiteral": 10,
@@ -59,20 +65,54 @@ var syntaxKinds = map[string]int{
"CommaToken": 27,
"QuestionDotToken": 28,
"LessThanToken": 29,
"LessThanSlashToken": 30,
"GreaterThanToken": 31,
"LessThanEqualsToken": 32,
"GreaterThanEqualsToken": 33,
"EqualsEqualsToken": 34,
"ExclamationEqualsToken": 35,
"EqualsEqualsEqualsToken": 36,
"ExclamationEqualsEqualsToken": 37,
"EqualsGreaterThanToken": 38,
"PlusToken": 39,
"MinusToken": 40,
"AsteriskToken": 41,
"AsteriskAsteriskToken": 42,
"SlashToken": 43,
"PercentToken": 44,
"PlusPlusToken": 45,
"MinusMinusToken": 46,
"LessThanLessThanToken": 47,
"GreaterThanGreaterThanToken": 48,
"GreaterThanGreaterThanGreaterThanToken": 49,
"AmpersandToken": 50,
"BarToken": 51,
"CaretToken": 52,
"ExclamationToken": 53,
"TildeToken": 54,
"AmpersandAmpersandToken": 55,
"BarBarToken": 56,
"QuestionToken": 57,
"ColonToken": 58,
"AtToken": 59,
"QuestionQuestionToken": 60,
"HashToken": 62,
"EqualsToken": 63,
"PlusEqualsToken": 64,
"MinusEqualsToken": 65,
"AsteriskEqualsToken": 66,
"AsteriskAsteriskEqualsToken": 67,
"SlashEqualsToken": 68,
"PercentEqualsToken": 69,
"LessThanLessThanEqualsToken": 70,
"GreaterThanGreaterThanEqualsToken": 71,
"GreaterThanGreaterThanGreaterThanEqualsToken": 72,
"AmpersandEqualsToken": 73,
"BarEqualsToken": 74,
"BarBarEqualsToken": 75,
"AmpersandAmpersandEqualsToken": 76,
"QuestionQuestionEqualsToken": 77,
"CaretEqualsToken": 78,
"Identifier": 79,
"PrivateIdentifier": 80,
"BreakKeyword": 82,
@@ -332,29 +372,34 @@ var syntaxKinds = map[string]int{
"JSDocImportTag": 344,
}
// nodeFlags maps NodeFlags names to their numeric values.
// nodeFlags maps NodeFlags names to their numeric values in TypeScript 7.
// TS7 removed the Synthesized flag, shifting all subsequent flags down by one bit
// compared to TS5. The Java extractor only checks Using, NestedNamespace, and
// GlobalAugmentation, but we include all flags for completeness.
var nodeFlags = map[string]int{
"None": 0,
"Let": 1,
"Const": 2,
"NestedNamespace": 4,
"Synthesized": 8,
"Namespace": 16,
"OptionalChain": 32,
"ExportContext": 64,
"ContainsThis": 128,
"HasImplicitReturn": 256,
"HasExplicitReturn": 512,
"HasAsyncFunctions": 1024,
"DisallowInContext": 2048,
"YieldContext": 4096,
"DecoratorContext": 8192,
"AwaitContext": 16384,
"DisallowConditionalTypesContext": 32768,
"ThisNodeHasError": 65536,
"JavaScriptFile": 131072,
"ThisNodeOrAnySubNodesHasError": 262144,
"HasAggregatedChildData": 524288,
"JSDoc": 4194304,
"JsonFile": 33554432,
"Using": 4, // Let | Const
"AwaitUsing": 6, // Using | Const
"NestedNamespace": 8, // bit 3 (TS7 binary AST doesn't set this)
"Namespace": 16, // bit 4 (was 32 in TS5)
"OptionalChain": 32, // bit 5 (was 64 in TS5)
"GlobalAugmentation": 64, // bit 6 — on `declare global { }` (was 2048 in TS5)
"ExportContext": 128, // bit 7
"ContainsThis": 256, // bit 8
"HasImplicitReturn": 512, // bit 9
"HasExplicitReturn": 1024, // bit 10
"HasAsyncFunctions": 2048, // bit 11
"DisallowInContext": 4096, // bit 12
"YieldContext": 8192, // bit 13
"DecoratorContext": 16384, // bit 14
"AwaitContext": 32768, // bit 15
"DisallowConditionalTypesContext": 65536, // bit 16
"ThisNodeHasError": 131072, // bit 17
"JavaScriptFile": 262144, // bit 18
"ThisNodeOrAnySubNodesHasError": 524288, // bit 19
"HasAggregatedChildData": 1048576, // bit 20
"JSDoc": 8388608, // bit 23
"JsonFile": 67108864, // bit 26
}

View File

@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"sync"
@@ -55,11 +56,37 @@ func (p *TsgoParser) findBinary() (string, error) {
// Look for tsgo on PATH (installed via: npm install -g @typescript/native-preview)
path, err := exec.LookPath("tsgo")
if err == nil {
// The npm-installed tsgo is a Node.js wrapper script that invokes the native binary.
// Try to resolve the native binary directly so we don't need Node.js at runtime.
if native := resolveNativeTsgo(path); native != "" {
return native, nil
}
return path, nil
}
return "", fmt.Errorf("tsgo binary not found on PATH; install with: npm install -g @typescript/native-preview")
}
// resolveNativeTsgo attempts to find the native tsgo binary inside an npm installation.
// The npm package @typescript/native-preview installs a Node.js wrapper at bin/tsgo
// which delegates to a platform-specific native binary at:
// node_modules/@typescript/native-preview-<platform>-<arch>/lib/tsgo
func resolveNativeTsgo(wrapperPath string) string {
// Follow symlinks to find the real wrapper location
resolved, err := filepath.EvalSymlinks(wrapperPath)
if err != nil {
return ""
}
// The wrapper is at <prefix>/bin/tsgo.js or <prefix>/bin/tsgo
// The native binary is at <prefix>/node_modules/@typescript/native-preview-<os>-<arch>/lib/tsgo
pkgDir := filepath.Dir(filepath.Dir(resolved))
platformPkg := fmt.Sprintf("@typescript/native-preview-%s-%s", runtime.GOOS, runtime.GOARCH)
native := filepath.Join(pkgDir, "node_modules", platformPkg, "lib", "tsgo")
if info, err := os.Stat(native); err == nil && !info.IsDir() {
return native
}
return ""
}
// startProcess starts the tsgo subprocess without sending any API requests.
func (p *TsgoParser) startProcess() error {
if p.started {

View File

@@ -1 +1 @@
**/*ql*/javascript/extractor/tests/*/input//
**/javascript/extractor/tests/*/input//