mirror of
https://github.com/github/codeql.git
synced 2026-05-02 20:25:13 +02:00
Merge pull request #205 from max/trap-writer-long-strings
Teach TRAP writer to truncate strings longer than 1MiB.
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/Semmle/go/extractor/srcarchive"
|
||||
"golang.org/x/tools/go/packages"
|
||||
@@ -83,6 +84,22 @@ func (tw *Writer) ForEachObject(cb func(*Writer, types.Object, Label)) {
|
||||
}
|
||||
}
|
||||
|
||||
const max_strlen = 1024 * 1024
|
||||
|
||||
func capStringLength(s string) string {
|
||||
// if the UTF8-encoded string is longer than 1MiB, we truncate it
|
||||
if len(s) > max_strlen {
|
||||
// to ensure that the truncated string is valid UTF-8, we find the last byte at or
|
||||
// before index max_strlen that starts a UTF-8 encoded character, and then cut off
|
||||
// right before that byte
|
||||
end := max_strlen
|
||||
for ; !utf8.RuneStart(s[end]); end-- {
|
||||
}
|
||||
return s[0:end]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Emit writes out a tuple of values for the given `table`
|
||||
func (tw *Writer) Emit(table string, values []interface{}) error {
|
||||
fmt.Fprintf(tw.w, "%s(", table)
|
||||
@@ -94,7 +111,7 @@ func (tw *Writer) Emit(table string, values []interface{}) error {
|
||||
case Label:
|
||||
fmt.Fprint(tw.w, value.id)
|
||||
case string:
|
||||
fmt.Fprintf(tw.w, "\"%s\"", escapeString(value))
|
||||
fmt.Fprintf(tw.w, "\"%s\"", escapeString(capStringLength(value)))
|
||||
case int:
|
||||
fmt.Fprintf(tw.w, "%d", value)
|
||||
default:
|
||||
|
||||
68
extractor/trap/trapwriter_test.go
Normal file
68
extractor/trap/trapwriter_test.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package trap
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
asciiChar = "*"
|
||||
bmpChar = "\u2028"
|
||||
nonBmpChar = "\U000101d0"
|
||||
)
|
||||
|
||||
func TestCapStringLength(t *testing.T) {
|
||||
// test simple cases only involving ASCII characters
|
||||
short := strings.Repeat(asciiChar, max_strlen-1)
|
||||
if capStringLength(short) != short {
|
||||
t.Errorf("Strings shorter than maximum length should not be truncated")
|
||||
}
|
||||
|
||||
short = strings.Repeat(asciiChar, max_strlen)
|
||||
if capStringLength(short) != short {
|
||||
t.Errorf("Strings no longer than maximum length should not be truncated")
|
||||
}
|
||||
|
||||
long := strings.Repeat(asciiChar, max_strlen+1)
|
||||
if capStringLength(long) != long[0:max_strlen] {
|
||||
t.Errorf("Strings longer than maximum length should be truncated")
|
||||
}
|
||||
|
||||
// test chopping off non-ASCII characters
|
||||
prefix := strings.Repeat(asciiChar, max_strlen)
|
||||
long = prefix + bmpChar
|
||||
if capStringLength(long) != prefix {
|
||||
t.Errorf("BMP character after max_strlen should be correctly chopped off")
|
||||
}
|
||||
|
||||
prefix = strings.Repeat(asciiChar, max_strlen)
|
||||
long = prefix + nonBmpChar
|
||||
if capStringLength(long) != prefix {
|
||||
t.Errorf("Non-BMP character after max_strlen should be correctly chopped off")
|
||||
}
|
||||
|
||||
prefix = strings.Repeat(asciiChar, max_strlen-(len(bmpChar)-1))
|
||||
long = prefix + bmpChar
|
||||
if capStringLength(long) != prefix {
|
||||
t.Errorf("BMP character straddling max_strlen should be correctly chopped off")
|
||||
}
|
||||
|
||||
prefix = strings.Repeat(asciiChar, max_strlen-(len(nonBmpChar)-1))
|
||||
long = prefix + nonBmpChar
|
||||
if capStringLength(long) != prefix {
|
||||
t.Errorf("Non-BMP character straddling max_strlen should be correctly chopped off")
|
||||
}
|
||||
|
||||
// test preserving non-ASCII characters that just about fit
|
||||
prefix = strings.Repeat(asciiChar, max_strlen-len(bmpChar))
|
||||
short = prefix + bmpChar
|
||||
if capStringLength(short) != short {
|
||||
t.Errorf("BMP character before max_strlen should be correctly preserved")
|
||||
}
|
||||
|
||||
prefix = strings.Repeat(asciiChar, max_strlen-len(nonBmpChar))
|
||||
short = prefix + nonBmpChar
|
||||
if capStringLength(short) != short {
|
||||
t.Errorf("Non-BMP character before max_strlen should be correctly preserved")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user