mirror of
https://github.com/github/codeql.git
synced 2025-12-21 03:06:31 +01:00
TRAP formatting: adopt Java's standards
* Encode dates with D"" strings * Truncate exceedingly long string values * Note that floats don't require any special handling
This commit is contained in:
committed by
Ian Lynagh
parent
4adf5829e4
commit
b9d8fe72f0
@@ -0,0 +1,52 @@
|
||||
package com.semmle.util.unicode;
|
||||
|
||||
public class UTF8Util {
|
||||
/**
|
||||
* Get the length (in Unicode code units, not code points) of the longest prefix of
|
||||
* a string that can be UTF-8 encoded in no more than the given number of bytes.
|
||||
*
|
||||
* <p>
|
||||
* Unencodable characters (such as lone surrogate halves or low surrogates
|
||||
* that do not follow a high surrogate) are treated as being encoded in
|
||||
* three bytes. This is safe since on encoding they will be replaced by
|
||||
* a replacement character, which in turn will take at most three bytes to
|
||||
* encode.
|
||||
* </p>
|
||||
*
|
||||
* @param str string to encode
|
||||
* @param maxEncodedLength maximum number of bytes for the encoded prefix
|
||||
* @return length of the prefix
|
||||
*/
|
||||
public static int encodablePrefixLength(String str, int maxEncodedLength) {
|
||||
// no character takes more than three bytes to encode
|
||||
if (str.length() > maxEncodedLength / 3) {
|
||||
int encodedLength = 0;
|
||||
for (int i = 0; i < str.length(); ++i) {
|
||||
int oldI = i;
|
||||
char c = str.charAt(i);
|
||||
if (c <= 0x7f) {
|
||||
encodedLength += 1;
|
||||
} else if (c <= 0x7ff) {
|
||||
encodedLength += 2;
|
||||
} else if (Character.isHighSurrogate(c)) {
|
||||
// surrogate pairs take four bytes to encode
|
||||
if (i+1 < str.length() && Character.isLowSurrogate(str.charAt(i+1))) {
|
||||
encodedLength += 4;
|
||||
++i;
|
||||
} else {
|
||||
// lone high surrogate, assume length three
|
||||
encodedLength += 3;
|
||||
}
|
||||
} else {
|
||||
encodedLength += 3;
|
||||
}
|
||||
|
||||
if (encodedLength > maxEncodedLength) {
|
||||
return oldI;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return str.length();
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import java.io.FileOutputStream
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
import com.semmle.util.files.FileUtil
|
||||
import com.semmle.util.unicode.UTF8Util
|
||||
import kotlin.system.exitProcess
|
||||
|
||||
class KotlinExtractorExtension(
|
||||
@@ -117,6 +118,10 @@ class FileExtractionProblems(val invocationExtractionProblems: ExtractionProblem
|
||||
|
||||
fun escapeTrapString(str: String) = str.replace("\"", "\"\"")
|
||||
|
||||
const val MAX_STRLEN = 1.shl(20) // 1 megabyte
|
||||
|
||||
fun truncateString(str: String) = str.substring(0, UTF8Util.encodablePrefixLength(str, MAX_STRLEN))
|
||||
|
||||
private fun equivalentTrap(f1: File, f2: File): Boolean {
|
||||
f1.bufferedReader().use { bw1 ->
|
||||
f2.bufferedReader().use { bw2 ->
|
||||
|
||||
Reference in New Issue
Block a user