diff --git a/javascript/extractor/lib/typescript-go/internal/astconv/converter.go b/javascript/extractor/lib/typescript-go/internal/astconv/converter.go index 2fc611a4c8c..251673d80cb 100644 --- a/javascript/extractor/lib/typescript-go/internal/astconv/converter.go +++ b/javascript/extractor/lib/typescript-go/internal/astconv/converter.go @@ -9,11 +9,19 @@ import ( // Converter transforms a BinaryAST into the JSON format expected by the // Java extractor. type Converter struct { - ast *BinaryAST - kindNames map[uint32]string // numeric kind → string name - sourceText string // source file text for $lineStarts / $pos augmentation - utf16Offsets []int // maps byte offset → UTF-16 code unit offset - byteOffsets []int // maps UTF-16 code unit offset → byte offset + ast *BinaryAST + kindNames map[uint32]string // numeric kind → string name + sourceText string // source file text for $lineStarts / $pos augmentation + utf16Offsets []int // maps byte offset → UTF-16 code unit offset + byteOffsets []int // maps UTF-16 code unit offset → byte offset + parseDiagnostics []ParseDiagnostic // syntactic diagnostics from the compiler +} + +// ParseDiagnostic represents a syntactic error reported by the TypeScript compiler. +type ParseDiagnostic struct { + Pos int // UTF-16 offset of error start + End int // UTF-16 offset of error end + MessageText string // human-readable error message } // NewConverter creates a Converter for the given binary AST. @@ -30,6 +38,11 @@ func NewConverter(ast *BinaryAST, kindToName map[uint32]string) *Converter { } } +// SetParseDiagnostics sets the syntactic diagnostics to include in the output. +func (c *Converter) SetParseDiagnostics(diags []ParseDiagnostic) { + c.parseDiagnostics = diags +} + // Convert transforms the binary AST into a JSON-serializable map. // The root node is at index 1. func (c *Converter) Convert() (map[string]interface{}, error) { @@ -158,8 +171,16 @@ func (c *Converter) handleSourceFile(i int, extOff uint32, node map[string]inter node["$lineStarts"] = computeLineStarts(c.sourceText, c.utf16Offsets) } - // Add empty parseDiagnostics array (expected by Java extractor) - node["parseDiagnostics"] = []interface{}{} + // Add parseDiagnostics (expected by Java extractor). + // The Java extractor uses these to report syntax errors and skip full extraction. + diagArray := make([]interface{}, 0, len(c.parseDiagnostics)) + for _, d := range c.parseDiagnostics { + diagArray = append(diagArray, map[string]interface{}{ + "$pos": d.Pos, + "messageText": d.MessageText, + }) + } + node["parseDiagnostics"] = diagArray // Add children (statements + EndOfFile) children := c.ast.Children(i) diff --git a/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go b/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go index cf83565fe75..8cb4a64640b 100644 --- a/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go +++ b/javascript/extractor/lib/typescript-go/internal/tsparser/tsgo.go @@ -373,8 +373,12 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) { return nil, fmt.Errorf("parse %s: failed to decode binary AST: %w", filename, err) } + // Fetch syntactic diagnostics (parse errors) from the compiler. + diags := p.getSyntacticDiagnostics(filename) + kindToName := BuildKindToNameMap() converter := astconv.NewConverter(binaryAST, kindToName) + converter.SetParseDiagnostics(diags) astObj, err := converter.Convert() if err != nil { return nil, fmt.Errorf("parse %s: failed to convert AST: %w", filename, err) @@ -388,6 +392,54 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) { }, nil } +// getSyntacticDiagnostics fetches parse errors from the tsgo API. +// Only includes true parse errors (diagnostic code < 2000), not semantic-level +// diagnostics like deprecation warnings that TS7 added (e.g., code 2880 for +// import assertions). Returns an empty slice on error (best-effort). +func (p *TsgoParser) getSyntacticDiagnostics(filename string) []astconv.ParseDiagnostic { + params := map[string]interface{}{ + "file": filename, + } + if p.snapshotHandle != "" { + params["snapshot"] = p.snapshotHandle + } + if p.projectHandle != "" { + params["project"] = p.projectHandle + } + + result, err := p.sendRequest("getSyntacticDiagnostics", params) + if err != nil { + return nil + } + + var rawDiags []struct { + Pos int `json:"pos"` + End int `json:"end"` + Code int `json:"code"` + Category int `json:"category"` + Text string `json:"text"` + } + if err := json.Unmarshal(result, &rawDiags); err != nil { + return nil + } + + diags := make([]astconv.ParseDiagnostic, 0, len(rawDiags)) + for _, d := range rawDiags { + // Only include genuine parse errors (codes 1000-1999). + // Higher codes are semantic diagnostics that TS7 reports as "syntactic" + // but which don't indicate actual parse failures. + if d.Code < 1000 || d.Code >= 2000 { + continue + } + diags = append(diags, astconv.ParseDiagnostic{ + Pos: d.Pos, + End: d.End, + MessageText: d.Text, + }) + } + return diags +} + // GetMetadata returns the syntax kinds and node flags. func (p *TsgoParser) GetMetadata() (*Metadata, error) { return GetStaticTS7Metadata(), nil