mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #924 from calumgrant/cs/extractor-string-length
C# extractor: Limit trap strings to 1MB
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
using System.Linq;
|
||||
|
||||
namespace Semmle.Extraction
|
||||
{
|
||||
/// <summary>
|
||||
@@ -15,6 +17,68 @@ namespace Semmle.Extraction
|
||||
Args = args;
|
||||
}
|
||||
|
||||
const int maxStringBytes = 1<<20; // 1MB
|
||||
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
|
||||
|
||||
private static bool NeedsTruncation(string s)
|
||||
{
|
||||
// Optimization: only count the actual number of bytes if there is the possibility
|
||||
// of the string exceeding maxStringBytes
|
||||
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
|
||||
encoding.GetByteCount(s) > maxStringBytes;
|
||||
}
|
||||
|
||||
private static bool NeedsTruncation(string[] array)
|
||||
{
|
||||
// Optimization: only count the actual number of bytes if there is the possibility
|
||||
// of the strings exceeding maxStringBytes
|
||||
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
|
||||
array.Sum(encoding.GetByteCount) > maxStringBytes;
|
||||
}
|
||||
|
||||
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
|
||||
|
||||
/// <summary>
|
||||
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
|
||||
/// </summary>
|
||||
/// <param name="s">The input string to truncate.</param>
|
||||
/// <param name="bytesRemaining">The number of bytes available.</param>
|
||||
/// <returns>The truncated string.</returns>
|
||||
private static string TruncateString(string s, ref int bytesRemaining)
|
||||
{
|
||||
int outputLen = encoding.GetByteCount(s);
|
||||
if (outputLen > bytesRemaining)
|
||||
{
|
||||
outputLen = 0;
|
||||
int chars;
|
||||
for (chars = 0; chars < s.Length; ++chars)
|
||||
{
|
||||
var bytes = encoding.GetByteCount(s, chars, 1);
|
||||
if (outputLen + bytes <= bytesRemaining)
|
||||
outputLen += bytes;
|
||||
else
|
||||
break;
|
||||
}
|
||||
s = s.Substring(0, chars);
|
||||
}
|
||||
bytesRemaining -= outputLen;
|
||||
return s;
|
||||
}
|
||||
|
||||
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
|
||||
|
||||
/// <summary>
|
||||
/// Output a string to the trap file, such that the encoded output does not exceed
|
||||
/// <paramref name="bytesRemaining"/> bytes.
|
||||
/// </summary>
|
||||
/// <param name="tb">The trapbuilder</param>
|
||||
/// <param name="s">The string to output.</param>
|
||||
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
|
||||
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
|
||||
{
|
||||
WriteString(tb, TruncateString(s, ref bytesRemaining));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Constructs a unique string for this tuple.
|
||||
/// </summary>
|
||||
@@ -27,49 +91,60 @@ namespace Semmle.Extraction
|
||||
foreach (var a in Args)
|
||||
{
|
||||
if (column > 0) tb.Append(", ");
|
||||
if (a is Label)
|
||||
{
|
||||
((Label)a).AppendTo(tb);
|
||||
}
|
||||
else if (a is IEntity)
|
||||
{
|
||||
((IEntity)a).Label.AppendTo(tb);
|
||||
}
|
||||
else if (a is string)
|
||||
switch(a)
|
||||
{
|
||||
case Label l:
|
||||
l.AppendTo(tb);
|
||||
break;
|
||||
case IEntity e:
|
||||
e.Label.AppendTo(tb);
|
||||
break;
|
||||
case string s:
|
||||
tb.Append("\"");
|
||||
tb.Append(((string)a).Replace("\"", "\"\""));
|
||||
if (NeedsTruncation(s))
|
||||
{
|
||||
// Slow path
|
||||
int remaining = maxStringBytes;
|
||||
WriteTruncatedString(tb, s, ref remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fast path
|
||||
WriteString(tb, s);
|
||||
}
|
||||
tb.Append("\"");
|
||||
}
|
||||
else if (a is System.Enum)
|
||||
{
|
||||
break;
|
||||
case System.Enum _:
|
||||
tb.Append((int)a);
|
||||
}
|
||||
else if (a is int)
|
||||
break;
|
||||
case int i:
|
||||
tb.Append(i);
|
||||
break;
|
||||
case string[] array:
|
||||
tb.Append("\"");
|
||||
if (NeedsTruncation(array))
|
||||
{
|
||||
tb.Append((int)a);
|
||||
// Slow path
|
||||
int remaining = maxStringBytes;
|
||||
foreach (var element in array)
|
||||
WriteTruncatedString(tb, element, ref remaining);
|
||||
}
|
||||
else if (a == null)
|
||||
else
|
||||
{
|
||||
// Fast path
|
||||
foreach (var element in array)
|
||||
WriteString(tb, element);
|
||||
}
|
||||
tb.Append("\"");
|
||||
break;
|
||||
case null:
|
||||
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
|
||||
Name, column);
|
||||
}
|
||||
else
|
||||
{
|
||||
var array = a as string[];
|
||||
if (array != null)
|
||||
{
|
||||
tb.Append("\"");
|
||||
foreach (var element in array)
|
||||
tb.Append(element.Replace("\"", "\"\""));
|
||||
tb.Append("\"");
|
||||
}
|
||||
else
|
||||
{
|
||||
default:
|
||||
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
|
||||
a.GetType(), Name, column);
|
||||
}
|
||||
}
|
||||
|
||||
++column;
|
||||
}
|
||||
tb.Append(")");
|
||||
|
||||
15508
csharp/ql/test/library-tests/extractor/longstrings/LongStrings.cs
Normal file
15508
csharp/ql/test/library-tests/extractor/longstrings/LongStrings.cs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1 @@
|
||||
| Test passed |
|
||||
@@ -0,0 +1,9 @@
|
||||
import csharp
|
||||
|
||||
from StringLiteral ascii, StringLiteral utf8
|
||||
where
|
||||
ascii.getValue().length() = 1048576 and
|
||||
// UTF8 encoding can vary a little from platform to platform
|
||||
utf8.getValue().length() > 440000 and
|
||||
utf8.getValue().length() < 450000
|
||||
select "Test passed"
|
||||
Reference in New Issue
Block a user