mirror of
https://github.com/github/codeql.git
synced 2026-05-02 20:25:13 +02:00
C# extractor: Limit string literals to 1MB. This is made more complicated by the fact that we need to limit the number of bytes to output, rather than the number of characters.
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
using System.Linq;
|
||||
|
||||
namespace Semmle.Extraction
|
||||
{
|
||||
/// <summary>
|
||||
@@ -15,6 +17,68 @@ namespace Semmle.Extraction
|
||||
Args = args;
|
||||
}
|
||||
|
||||
const int maxStringBytes = 1<<20; // 1MB
|
||||
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
|
||||
|
||||
private static bool NeedsTruncation(string s)
|
||||
{
|
||||
// Optimization: only count the actual number of bytes if there is the possibility
|
||||
// of the string exceeding maxStringBytes
|
||||
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
|
||||
encoding.GetByteCount(s) > maxStringBytes;
|
||||
}
|
||||
|
||||
private static bool NeedsTruncation(string[] array)
|
||||
{
|
||||
// Optimization: only count the actual number of bytes if there is the possibility
|
||||
// of the strings exceeding maxStringBytes
|
||||
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
|
||||
array.Sum(s => encoding.GetByteCount(s)) > maxStringBytes;
|
||||
}
|
||||
|
||||
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
|
||||
|
||||
/// <summary>
|
||||
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
|
||||
/// </summary>
|
||||
/// <param name="s">The input string to truncate.</param>
|
||||
/// <param name="bytesRemaining">The number of bytes available.</param>
|
||||
/// <returns>The truncated string.</returns>
|
||||
private static string TruncateString(string s, ref int bytesRemaining)
|
||||
{
|
||||
int outputLen = encoding.GetByteCount(s);
|
||||
if (outputLen > bytesRemaining)
|
||||
{
|
||||
outputLen = 0;
|
||||
int chars;
|
||||
for (chars = 0; chars < s.Length; ++chars)
|
||||
{
|
||||
var bytes = encoding.GetByteCount(s, chars, 1);
|
||||
if (outputLen + bytes <= bytesRemaining)
|
||||
outputLen += bytes;
|
||||
else
|
||||
break;
|
||||
}
|
||||
s = s.Substring(0, chars);
|
||||
}
|
||||
bytesRemaining -= outputLen;
|
||||
return s;
|
||||
}
|
||||
|
||||
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
|
||||
|
||||
/// <summary>
|
||||
/// Output a string to the trap file, such that the encoded output does not exceed
|
||||
/// <paramref name="bytesRemaining"/> bytes.
|
||||
/// </summary>
|
||||
/// <param name="tb">The trapbuilder</param>
|
||||
/// <param name="s">The string to output.</param>
|
||||
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
|
||||
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
|
||||
{
|
||||
WriteString(tb, TruncateString(s, ref bytesRemaining));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Constructs a unique string for this tuple.
|
||||
/// </summary>
|
||||
@@ -27,48 +91,65 @@ namespace Semmle.Extraction
|
||||
foreach (var a in Args)
|
||||
{
|
||||
if (column > 0) tb.Append(", ");
|
||||
if (a is Label)
|
||||
if (a is Label l)
|
||||
{
|
||||
((Label)a).AppendTo(tb);
|
||||
l.AppendTo(tb);
|
||||
}
|
||||
else if (a is IEntity)
|
||||
else if (a is IEntity e)
|
||||
{
|
||||
((IEntity)a).Label.AppendTo(tb);
|
||||
e.Label.AppendTo(tb);
|
||||
}
|
||||
else if (a is string)
|
||||
else if (a is string s)
|
||||
{
|
||||
tb.Append("\"");
|
||||
tb.Append(((string)a).Replace("\"", "\"\""));
|
||||
if (NeedsTruncation(s))
|
||||
{
|
||||
// Slow path
|
||||
int remaining = maxStringBytes;
|
||||
WriteTruncatedString(tb, s, ref remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fast path
|
||||
WriteString(tb, s);
|
||||
}
|
||||
tb.Append("\"");
|
||||
}
|
||||
else if (a is System.Enum)
|
||||
{
|
||||
tb.Append((int)a);
|
||||
}
|
||||
else if (a is int)
|
||||
else if (a is int i)
|
||||
{
|
||||
tb.Append((int)a);
|
||||
tb.Append(i);
|
||||
}
|
||||
else if (a == null)
|
||||
else if(a is string[] array)
|
||||
{
|
||||
tb.Append("\"");
|
||||
if (NeedsTruncation(array))
|
||||
{
|
||||
// Slow path
|
||||
int remaining = maxStringBytes;
|
||||
foreach (var element in array)
|
||||
WriteTruncatedString(tb, element, ref remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fast path
|
||||
foreach (var element in array)
|
||||
WriteString(tb, element);
|
||||
}
|
||||
tb.Append("\"");
|
||||
}
|
||||
else if (a is null)
|
||||
{
|
||||
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
|
||||
Name, column);
|
||||
}
|
||||
else
|
||||
{
|
||||
var array = a as string[];
|
||||
if (array != null)
|
||||
{
|
||||
tb.Append("\"");
|
||||
foreach (var element in array)
|
||||
tb.Append(element.Replace("\"", "\"\""));
|
||||
tb.Append("\"");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
|
||||
a.GetType(), Name, column);
|
||||
}
|
||||
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
|
||||
a.GetType(), Name, column);
|
||||
}
|
||||
++column;
|
||||
}
|
||||
|
||||
15508
csharp/ql/test/library-tests/extractor/longstrings/LongStrings.cs
Normal file
15508
csharp/ql/test/library-tests/extractor/longstrings/LongStrings.cs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,2 @@
|
||||
| 443093 |
|
||||
| 1048576 |
|
||||
@@ -0,0 +1,4 @@
|
||||
import csharp
|
||||
|
||||
from StringLiteral lit
|
||||
select lit.getValue().length()
|
||||
Reference in New Issue
Block a user