From 637ce99e447a40c258e82adf157ffc7996989873 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 13 Apr 2026 15:11:48 +0200 Subject: [PATCH] TypeScript Go extractor: metadata fixes, NestedNamespace inference, and scanner improvements - Fix TS7 nodeFlags: remove Synthesized (shifted in TS7), add GlobalAugmentation=64, correct OptionalChain=32, Namespace=16, shift subsequent flags - Add 33 missing operator/punctuation token kinds to syntaxKinds metadata - Infer NestedNamespace flag for dotted namespace declarations (TS7 binary doesn't set it, but Java extractor needs it) - Fix shebang handling: emit ShebangTrivia (kind 6) instead of SingleLineCommentTrivia - Fix token kinds for regex/template rescans to match TS5 pre-rescan behavior (SlashToken for regexes, CloseBraceToken for template continuations) - Fix augmentPos to correctly skip comments (matching TS5's trivia-skipping regex) - Resolve native tsgo binary from npm wrapper to avoid Node.js dependency - Update project-layout glob for worktree support TRAP test results: 493/495 passing (99.6%) Remaining: badimport.ts (missing diagnostics), externalmodule.ts (structural diff) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../internal/astconv/converter.go | 26 +++++- .../typescript-go/internal/astconv/scanner.go | 30 +++++-- .../internal/tsparser/metadata.go | 89 ++++++++++++++----- .../typescript-go/internal/tsparser/tsgo.go | 27 ++++++ javascript/extractor/tests/project-layout | 2 +- 5 files changed, 140 insertions(+), 34 deletions(-) diff --git a/javascript/extractor/lib/typescript-go/internal/astconv/converter.go b/javascript/extractor/lib/typescript-go/internal/astconv/converter.go index d83e7265f4c..d93d65f3adf 100644 --- a/javascript/extractor/lib/typescript-go/internal/astconv/converter.go +++ b/javascript/extractor/lib/typescript-go/internal/astconv/converter.go @@ -82,6 +82,20 @@ func (c *Converter) convertNode(i int) (map[string]interface{}, error) { // Add defined-bits-based properties c.addDefinedBitProperties(i, kindName, node) + // TS7 doesn't set the NestedNamespace flag in the binary AST, but the Java + // extractor needs it to wrap inner namespace declarations in ExportNamedDeclaration. + // Detect nested namespaces (ModuleDeclaration whose body is another ModuleDeclaration) + // and add the flag to the inner declaration. + if kindName == "ModuleDeclaration" { + if body, ok := node["body"].(map[string]interface{}); ok { + if bodyKind, ok := body["kind"].(int); ok && bodyKind == 268 { // 268 = ModuleDeclaration + if flags, ok := body["flags"].(int); ok { + body["flags"] = flags | 8 // NestedNamespace = 8 + } + } + } + } + return node, nil } @@ -491,10 +505,13 @@ func (c *Converter) addDefinedBitProperties(i int, kindName string, node map[str } // augmentPos replicates the Node.js wrapper's $pos augmentation: -// if skipTrivia is true, advances past leading whitespace and comments. +// if skip is true, advances past leading whitespace, single-line comments (//), +// and multi-line comments (/* */). This matches the TS5 Node.js wrapper regex: +// /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g +// Note: shebangs (#!) are NOT skipped — the TS5 regex does not match them. // Input pos is a UTF-16 code unit offset; returns a UTF-16 code unit offset. -func (c *Converter) augmentPos(pos int, skipTrivia bool) int { - if !skipTrivia || c.sourceText == "" { +func (c *Converter) augmentPos(pos int, skip bool) int { + if !skip || c.sourceText == "" { return pos } return byteToUTF16(c.skipTrivia(utf16ToByte(pos, c.byteOffsets)), c.utf16Offsets) @@ -506,7 +523,8 @@ func (c *Converter) augmentBytePos(utf16Pos int) int { return c.skipTrivia(utf16ToByte(utf16Pos, c.byteOffsets)) } -// skipTrivia advances past whitespace and comments starting at byte offset i. +// skipTrivia advances past whitespace, single-line comments (//), and +// multi-line comments (/* */), starting at byte offset i. func (c *Converter) skipTrivia(i int) int { n := len(c.sourceText) for i < n { diff --git a/javascript/extractor/lib/typescript-go/internal/astconv/scanner.go b/javascript/extractor/lib/typescript-go/internal/astconv/scanner.go index cb1d1684153..b1b1e539e2b 100644 --- a/javascript/extractor/lib/typescript-go/internal/astconv/scanner.go +++ b/javascript/extractor/lib/typescript-go/internal/astconv/scanner.go @@ -13,7 +13,8 @@ const ( KindMultiLineCommentTrivia = 3 KindNewLineTrivia = 4 KindWhitespaceTrivia = 5 - KindConflictMarkerTrivia = 6 + KindShebangTrivia = 6 + KindConflictMarkerTrivia = 7 KindNumericLiteral = 8 KindBigIntLiteral = 9 KindStringLiteral = 10 @@ -120,7 +121,8 @@ func NewScanner(text string, rescanEvents []RescanEvent) *Scanner { } } -// ScanAll produces all tokens from the source text. +// ScanAll produces all tokens from the source text, including trivia +// (whitespace, newlines, comments), matching the Node.js wrapper behavior. func (s *Scanner) ScanAll() []Token { var tokens []Token for { @@ -206,15 +208,24 @@ func (s *Scanner) scan() Token { return Token{Kind: KindNewLineTrivia, TokenPos: tokenPos, Text: s.text[tokenPos:s.pos]} } - // Check for rescan event at this position + // Check for rescan event at this position. + // TS5's scanner loop captures the token kind BEFORE the rescan event fires, + // then uses the rescanned text. So regex tokens get kind=SlashToken with + // text="/pattern/flags", and template continuation tokens get kind=CloseBraceToken + // with the template text. We replicate this by scanning the full content but + // using the pre-rescan kind. if tokenPos == s.nextRescanPos() { kind := s.nextRescanKind() s.consumeRescan() switch kind { case "regex": - return s.scanRegExp(tokenPos) + tok := s.scanRegExp(tokenPos) + tok.Kind = KindSlashToken + return tok case "template": - return s.scanTemplatePart(tokenPos, true) + tok := s.scanTemplatePart(tokenPos, true) + tok.Kind = KindCloseBraceToken + return tok case "greater": return s.scanGreater(tokenPos) } @@ -454,8 +465,13 @@ func (s *Scanner) scan() Token { case '#': // Could be private identifier if s.peekAt(1) == '!' && tokenPos == 0 { - // Shebang — scan to end of line - return s.scanSingleLineComment(tokenPos) + // Shebang — scan to end of line, emit as ShebangTrivia + start := s.pos + for s.pos < len(s.text) && s.text[s.pos] != '\n' && s.text[s.pos] != '\r' { + s.pos++ + } + text := s.text[start:s.pos] + return Token{Kind: KindShebangTrivia, TokenPos: tokenPos, Text: text} } if isIdentStart(s.peekAt(1)) { return s.scanPrivateIdentifier(tokenPos) diff --git a/javascript/extractor/lib/typescript-go/internal/tsparser/metadata.go b/javascript/extractor/lib/typescript-go/internal/tsparser/metadata.go index c92afa234d0..6fba227adf7 100644 --- a/javascript/extractor/lib/typescript-go/internal/tsparser/metadata.go +++ b/javascript/extractor/lib/typescript-go/internal/tsparser/metadata.go @@ -36,7 +36,13 @@ func BuildKindToNameMap() map[uint32]string { // Generated from microsoft/typescript-go/internal/ast/kind.go (iota enum). var syntaxKinds = map[string]int{ "Unknown": 0, - "EndOfFile": 1, + "EndOfFileToken": 1, + "SingleLineCommentTrivia": 2, + "MultiLineCommentTrivia": 3, + "NewLineTrivia": 4, + "WhitespaceTrivia": 5, + "ShebangTrivia": 6, + "ConflictMarkerTrivia": 7, "NumericLiteral": 8, "BigIntLiteral": 9, "StringLiteral": 10, @@ -59,20 +65,54 @@ var syntaxKinds = map[string]int{ "CommaToken": 27, "QuestionDotToken": 28, "LessThanToken": 29, + "LessThanSlashToken": 30, "GreaterThanToken": 31, + "LessThanEqualsToken": 32, + "GreaterThanEqualsToken": 33, + "EqualsEqualsToken": 34, + "ExclamationEqualsToken": 35, + "EqualsEqualsEqualsToken": 36, + "ExclamationEqualsEqualsToken": 37, "EqualsGreaterThanToken": 38, "PlusToken": 39, "MinusToken": 40, "AsteriskToken": 41, + "AsteriskAsteriskToken": 42, "SlashToken": 43, + "PercentToken": 44, "PlusPlusToken": 45, "MinusMinusToken": 46, + "LessThanLessThanToken": 47, + "GreaterThanGreaterThanToken": 48, + "GreaterThanGreaterThanGreaterThanToken": 49, + "AmpersandToken": 50, + "BarToken": 51, + "CaretToken": 52, "ExclamationToken": 53, "TildeToken": 54, + "AmpersandAmpersandToken": 55, + "BarBarToken": 56, "QuestionToken": 57, "ColonToken": 58, "AtToken": 59, + "QuestionQuestionToken": 60, + "HashToken": 62, "EqualsToken": 63, + "PlusEqualsToken": 64, + "MinusEqualsToken": 65, + "AsteriskEqualsToken": 66, + "AsteriskAsteriskEqualsToken": 67, + "SlashEqualsToken": 68, + "PercentEqualsToken": 69, + "LessThanLessThanEqualsToken": 70, + "GreaterThanGreaterThanEqualsToken": 71, + "GreaterThanGreaterThanGreaterThanEqualsToken": 72, + "AmpersandEqualsToken": 73, + "BarEqualsToken": 74, + "BarBarEqualsToken": 75, + "AmpersandAmpersandEqualsToken": 76, + "QuestionQuestionEqualsToken": 77, + "CaretEqualsToken": 78, "Identifier": 79, "PrivateIdentifier": 80, "BreakKeyword": 82, @@ -332,29 +372,34 @@ var syntaxKinds = map[string]int{ "JSDocImportTag": 344, } -// nodeFlags maps NodeFlags names to their numeric values. +// nodeFlags maps NodeFlags names to their numeric values in TypeScript 7. +// TS7 removed the Synthesized flag, shifting all subsequent flags down by one bit +// compared to TS5. The Java extractor only checks Using, NestedNamespace, and +// GlobalAugmentation, but we include all flags for completeness. var nodeFlags = map[string]int{ "None": 0, "Let": 1, "Const": 2, - "NestedNamespace": 4, - "Synthesized": 8, - "Namespace": 16, - "OptionalChain": 32, - "ExportContext": 64, - "ContainsThis": 128, - "HasImplicitReturn": 256, - "HasExplicitReturn": 512, - "HasAsyncFunctions": 1024, - "DisallowInContext": 2048, - "YieldContext": 4096, - "DecoratorContext": 8192, - "AwaitContext": 16384, - "DisallowConditionalTypesContext": 32768, - "ThisNodeHasError": 65536, - "JavaScriptFile": 131072, - "ThisNodeOrAnySubNodesHasError": 262144, - "HasAggregatedChildData": 524288, - "JSDoc": 4194304, - "JsonFile": 33554432, + "Using": 4, // Let | Const + "AwaitUsing": 6, // Using | Const + "NestedNamespace": 8, // bit 3 (TS7 binary AST doesn't set this) + "Namespace": 16, // bit 4 (was 32 in TS5) + "OptionalChain": 32, // bit 5 (was 64 in TS5) + "GlobalAugmentation": 64, // bit 6 — on `declare global { }` (was 2048 in TS5) + "ExportContext": 128, // bit 7 + "ContainsThis": 256, // bit 8 + "HasImplicitReturn": 512, // bit 9 + "HasExplicitReturn": 1024, // bit 10 + "HasAsyncFunctions": 2048, // bit 11 + "DisallowInContext": 4096, // bit 12 + "YieldContext": 8192, // bit 13 + "DecoratorContext": 16384, // bit 14 + "AwaitContext": 32768, // bit 15 + "DisallowConditionalTypesContext": 65536, // bit 16 + "ThisNodeHasError": 131072, // bit 17 + "JavaScriptFile": 262144, // bit 18 + "ThisNodeOrAnySubNodesHasError": 524288, // bit 19 + "HasAggregatedChildData": 1048576, // bit 20 + "JSDoc": 8388608, // bit 23 + "JsonFile": 67108864, // bit 26 } diff --git a/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go b/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go index 6c2b70631e3..cf83565fe75 100644 --- a/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go +++ b/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strconv" "sync" @@ -55,11 +56,37 @@ func (p *TsgoParser) findBinary() (string, error) { // Look for tsgo on PATH (installed via: npm install -g @typescript/native-preview) path, err := exec.LookPath("tsgo") if err == nil { + // The npm-installed tsgo is a Node.js wrapper script that invokes the native binary. + // Try to resolve the native binary directly so we don't need Node.js at runtime. + if native := resolveNativeTsgo(path); native != "" { + return native, nil + } return path, nil } return "", fmt.Errorf("tsgo binary not found on PATH; install with: npm install -g @typescript/native-preview") } +// resolveNativeTsgo attempts to find the native tsgo binary inside an npm installation. +// The npm package @typescript/native-preview installs a Node.js wrapper at bin/tsgo +// which delegates to a platform-specific native binary at: +// node_modules/@typescript/native-preview--/lib/tsgo +func resolveNativeTsgo(wrapperPath string) string { + // Follow symlinks to find the real wrapper location + resolved, err := filepath.EvalSymlinks(wrapperPath) + if err != nil { + return "" + } + // The wrapper is at /bin/tsgo.js or /bin/tsgo + // The native binary is at /node_modules/@typescript/native-preview--/lib/tsgo + pkgDir := filepath.Dir(filepath.Dir(resolved)) + platformPkg := fmt.Sprintf("@typescript/native-preview-%s-%s", runtime.GOOS, runtime.GOARCH) + native := filepath.Join(pkgDir, "node_modules", platformPkg, "lib", "tsgo") + if info, err := os.Stat(native); err == nil && !info.IsDir() { + return native + } + return "" +} + // startProcess starts the tsgo subprocess without sending any API requests. func (p *TsgoParser) startProcess() error { if p.started { diff --git a/javascript/extractor/tests/project-layout b/javascript/extractor/tests/project-layout index ecee7dfcd40..9ef589d8960 100644 --- a/javascript/extractor/tests/project-layout +++ b/javascript/extractor/tests/project-layout @@ -1 +1 @@ -**/*ql*/javascript/extractor/tests/*/input// +**/javascript/extractor/tests/*/input//