C# extractor: Limit string literals to 1MB. This is made more complicated by the fact that we need to limit the number of bytes to output, rather than the number of characters.

This commit is contained in:
calum
2019-02-11 17:36:23 +00:00
parent 97fc4b0c8e
commit 8bb1af884a
4 changed files with 15617 additions and 22 deletions

View File

@@ -1,3 +1,5 @@
using System.Linq;
namespace Semmle.Extraction
{
/// <summary>
@@ -15,6 +17,68 @@ namespace Semmle.Extraction
Args = args;
}
const int maxStringBytes = 1<<20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static bool NeedsTruncation(string[] array)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the strings exceeding maxStringBytes
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
array.Sum(s => encoding.GetByteCount(s)) > maxStringBytes;
}
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="tb">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
{
WriteString(tb, TruncateString(s, ref bytesRemaining));
}
/// <summary>
/// Constructs a unique string for this tuple.
/// </summary>
@@ -27,48 +91,65 @@ namespace Semmle.Extraction
foreach (var a in Args)
{
if (column > 0) tb.Append(", ");
if (a is Label)
if (a is Label l)
{
((Label)a).AppendTo(tb);
l.AppendTo(tb);
}
else if (a is IEntity)
else if (a is IEntity e)
{
((IEntity)a).Label.AppendTo(tb);
e.Label.AppendTo(tb);
}
else if (a is string)
else if (a is string s)
{
tb.Append("\"");
tb.Append(((string)a).Replace("\"", "\"\""));
if (NeedsTruncation(s))
{
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(tb, s, ref remaining);
}
else
{
// Fast path
WriteString(tb, s);
}
tb.Append("\"");
}
else if (a is System.Enum)
{
tb.Append((int)a);
}
else if (a is int)
else if (a is int i)
{
tb.Append((int)a);
tb.Append(i);
}
else if (a == null)
else if(a is string[] array)
{
tb.Append("\"");
if (NeedsTruncation(array))
{
// Slow path
int remaining = maxStringBytes;
foreach (var element in array)
WriteTruncatedString(tb, element, ref remaining);
}
else
{
// Fast path
foreach (var element in array)
WriteString(tb, element);
}
tb.Append("\"");
}
else if (a is null)
{
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
Name, column);
}
else
{
var array = a as string[];
if (array != null)
{
tb.Append("\"");
foreach (var element in array)
tb.Append(element.Replace("\"", "\"\""));
tb.Append("\"");
}
else
{
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
a.GetType(), Name, column);
}
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
a.GetType(), Name, column);
}
++column;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
| 443093 |
| 1048576 |

View File

@@ -0,0 +1,4 @@
import csharp
from StringLiteral lit
select lit.getValue().length()