Add parse diagnostics support via getSyntacticDiagnostics API

Fetch syntactic diagnostics from the tsgo API after parsing each file.
Only genuine parse errors (diagnostic codes 1000-1999) are included;
higher codes like 2880 (import assertion deprecation) are filtered out
since they don't indicate actual parse failures.

The Java extractor uses parseDiagnostics to report syntax errors and
skip full AST extraction for broken files, matching TS5 behavior.

TRAP test results: 495/495 passing (100%)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Asger F
2026-04-13 15:27:47 +02:00
parent fbaf648e4f
commit 47d299e93b
2 changed files with 80 additions and 7 deletions

View File

@@ -9,11 +9,19 @@ import (
// Converter transforms a BinaryAST into the JSON format expected by the
// Java extractor.
type Converter struct {
ast *BinaryAST
kindNames map[uint32]string // numeric kind → string name
sourceText string // source file text for $lineStarts / $pos augmentation
utf16Offsets []int // maps byte offset → UTF-16 code unit offset
byteOffsets []int // maps UTF-16 code unit offset → byte offset
ast *BinaryAST
kindNames map[uint32]string // numeric kind → string name
sourceText string // source file text for $lineStarts / $pos augmentation
utf16Offsets []int // maps byte offset → UTF-16 code unit offset
byteOffsets []int // maps UTF-16 code unit offset → byte offset
parseDiagnostics []ParseDiagnostic // syntactic diagnostics from the compiler
}
// ParseDiagnostic represents a syntactic error reported by the TypeScript compiler.
type ParseDiagnostic struct {
Pos int // UTF-16 offset of error start
End int // UTF-16 offset of error end
MessageText string // human-readable error message
}
// NewConverter creates a Converter for the given binary AST.
@@ -30,6 +38,11 @@ func NewConverter(ast *BinaryAST, kindToName map[uint32]string) *Converter {
}
}
// SetParseDiagnostics sets the syntactic diagnostics to include in the output.
func (c *Converter) SetParseDiagnostics(diags []ParseDiagnostic) {
c.parseDiagnostics = diags
}
// Convert transforms the binary AST into a JSON-serializable map.
// The root node is at index 1.
func (c *Converter) Convert() (map[string]interface{}, error) {
@@ -158,8 +171,16 @@ func (c *Converter) handleSourceFile(i int, extOff uint32, node map[string]inter
node["$lineStarts"] = computeLineStarts(c.sourceText, c.utf16Offsets)
}
// Add empty parseDiagnostics array (expected by Java extractor)
node["parseDiagnostics"] = []interface{}{}
// Add parseDiagnostics (expected by Java extractor).
// The Java extractor uses these to report syntax errors and skip full extraction.
diagArray := make([]interface{}, 0, len(c.parseDiagnostics))
for _, d := range c.parseDiagnostics {
diagArray = append(diagArray, map[string]interface{}{
"$pos": d.Pos,
"messageText": d.MessageText,
})
}
node["parseDiagnostics"] = diagArray
// Add children (statements + EndOfFile)
children := c.ast.Children(i)

View File

@@ -373,8 +373,12 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) {
return nil, fmt.Errorf("parse %s: failed to decode binary AST: %w", filename, err)
}
// Fetch syntactic diagnostics (parse errors) from the compiler.
diags := p.getSyntacticDiagnostics(filename)
kindToName := BuildKindToNameMap()
converter := astconv.NewConverter(binaryAST, kindToName)
converter.SetParseDiagnostics(diags)
astObj, err := converter.Convert()
if err != nil {
return nil, fmt.Errorf("parse %s: failed to convert AST: %w", filename, err)
@@ -388,6 +392,54 @@ func (p *TsgoParser) Parse(filename string) (*ParseResult, error) {
}, nil
}
// getSyntacticDiagnostics fetches parse errors from the tsgo API.
// Only includes true parse errors (diagnostic code < 2000), not semantic-level
// diagnostics like deprecation warnings that TS7 added (e.g., code 2880 for
// import assertions). Returns an empty slice on error (best-effort).
func (p *TsgoParser) getSyntacticDiagnostics(filename string) []astconv.ParseDiagnostic {
params := map[string]interface{}{
"file": filename,
}
if p.snapshotHandle != "" {
params["snapshot"] = p.snapshotHandle
}
if p.projectHandle != "" {
params["project"] = p.projectHandle
}
result, err := p.sendRequest("getSyntacticDiagnostics", params)
if err != nil {
return nil
}
var rawDiags []struct {
Pos int `json:"pos"`
End int `json:"end"`
Code int `json:"code"`
Category int `json:"category"`
Text string `json:"text"`
}
if err := json.Unmarshal(result, &rawDiags); err != nil {
return nil
}
diags := make([]astconv.ParseDiagnostic, 0, len(rawDiags))
for _, d := range rawDiags {
// Only include genuine parse errors (codes 1000-1999).
// Higher codes are semantic diagnostics that TS7 reports as "syntactic"
// but which don't indicate actual parse failures.
if d.Code < 1000 || d.Code >= 2000 {
continue
}
diags = append(diags, astconv.ParseDiagnostic{
Pos: d.Pos,
End: d.End,
MessageText: d.Text,
})
}
return diags
}
// GetMetadata returns the syntax kinds and node flags.
func (p *TsgoParser) GetMetadata() (*Metadata, error) {
return GetStaticTS7Metadata(), nil