TRAP formatting: adopt Java's standards

* Encode dates with D"" strings
* Truncate exceedingly long string values
* Note that floats don't require any special handling
This commit is contained in:
Chris Smowton
2022-01-31 15:13:09 +00:00
committed by Ian Lynagh
parent 4adf5829e4
commit b9d8fe72f0
3 changed files with 63 additions and 5 deletions

View File

@@ -0,0 +1,52 @@
package com.semmle.util.unicode;
public class UTF8Util {
/**
* Get the length (in Unicode code units, not code points) of the longest prefix of
* a string that can be UTF-8 encoded in no more than the given number of bytes.
*
* <p>
* Unencodable characters (such as lone surrogate halves or low surrogates
* that do not follow a high surrogate) are treated as being encoded in
* three bytes. This is safe since on encoding they will be replaced by
* a replacement character, which in turn will take at most three bytes to
* encode.
* </p>
*
* @param str string to encode
* @param maxEncodedLength maximum number of bytes for the encoded prefix
* @return length of the prefix
*/
public static int encodablePrefixLength(String str, int maxEncodedLength) {
// no character takes more than three bytes to encode
if (str.length() > maxEncodedLength / 3) {
int encodedLength = 0;
for (int i = 0; i < str.length(); ++i) {
int oldI = i;
char c = str.charAt(i);
if (c <= 0x7f) {
encodedLength += 1;
} else if (c <= 0x7ff) {
encodedLength += 2;
} else if (Character.isHighSurrogate(c)) {
// surrogate pairs take four bytes to encode
if (i+1 < str.length() && Character.isLowSurrogate(str.charAt(i+1))) {
encodedLength += 4;
++i;
} else {
// lone high surrogate, assume length three
encodedLength += 3;
}
} else {
encodedLength += 3;
}
if (encodedLength > maxEncodedLength) {
return oldI;
}
}
}
return str.length();
}
}

View File

@@ -8,6 +8,7 @@ import java.io.FileOutputStream
import java.nio.file.Files
import java.nio.file.Paths
import com.semmle.util.files.FileUtil
import com.semmle.util.unicode.UTF8Util
import kotlin.system.exitProcess
class KotlinExtractorExtension(
@@ -117,6 +118,10 @@ class FileExtractionProblems(val invocationExtractionProblems: ExtractionProblem
fun escapeTrapString(str: String) = str.replace("\"", "\"\"")
const val MAX_STRLEN = 1.shl(20) // 1 megabyte
fun truncateString(str: String) = str.substring(0, UTF8Util.encodablePrefixLength(str, MAX_STRLEN))
private fun equivalentTrap(f1: File, f2: File): Boolean {
f1.bufferedReader().use { bw1 ->
f2.bufferedReader().use { bw2 ->