Merge pull request #924 from calumgrant/cs/extractor-string-length

C# extractor: Limit trap strings to 1MB
This commit is contained in:
Tom Hvitved
2019-02-13 09:34:36 +01:00
committed by GitHub
4 changed files with 15630 additions and 37 deletions

View File

@@ -1,3 +1,5 @@
using System.Linq;
namespace Semmle.Extraction namespace Semmle.Extraction
{ {
/// <summary> /// <summary>
@@ -15,6 +17,68 @@ namespace Semmle.Extraction
Args = args; Args = args;
} }
const int maxStringBytes = 1<<20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static bool NeedsTruncation(string[] array)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the strings exceeding maxStringBytes
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
array.Sum(encoding.GetByteCount) > maxStringBytes;
}
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="tb">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
{
WriteString(tb, TruncateString(s, ref bytesRemaining));
}
/// <summary> /// <summary>
/// Constructs a unique string for this tuple. /// Constructs a unique string for this tuple.
/// </summary> /// </summary>
@@ -27,49 +91,60 @@ namespace Semmle.Extraction
foreach (var a in Args) foreach (var a in Args)
{ {
if (column > 0) tb.Append(", "); if (column > 0) tb.Append(", ");
if (a is Label) switch(a)
{ {
((Label)a).AppendTo(tb); case Label l:
} l.AppendTo(tb);
else if (a is IEntity) break;
{ case IEntity e:
((IEntity)a).Label.AppendTo(tb); e.Label.AppendTo(tb);
} break;
else if (a is string) case string s:
{
tb.Append("\"");
tb.Append(((string)a).Replace("\"", "\"\""));
tb.Append("\"");
}
else if (a is System.Enum)
{
tb.Append((int)a);
}
else if (a is int)
{
tb.Append((int)a);
}
else if (a == null)
{
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
Name, column);
}
else
{
var array = a as string[];
if (array != null)
{
tb.Append("\""); tb.Append("\"");
foreach (var element in array) if (NeedsTruncation(s))
tb.Append(element.Replace("\"", "\"\"")); {
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(tb, s, ref remaining);
}
else
{
// Fast path
WriteString(tb, s);
}
tb.Append("\""); tb.Append("\"");
} break;
else case System.Enum _:
{ tb.Append((int)a);
break;
case int i:
tb.Append(i);
break;
case string[] array:
tb.Append("\"");
if (NeedsTruncation(array))
{
// Slow path
int remaining = maxStringBytes;
foreach (var element in array)
WriteTruncatedString(tb, element, ref remaining);
}
else
{
// Fast path
foreach (var element in array)
WriteString(tb, element);
}
tb.Append("\"");
break;
case null:
throw new InternalError("Attempt to write a null argument tuple {0} at column {1}",
Name, column);
default:
throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}", throw new InternalError("Attempt to write an invalid argument type {0} in tuple {1} at column {2}",
a.GetType(), Name, column); a.GetType(), Name, column);
}
} }
++column; ++column;
} }
tb.Append(")"); tb.Append(")");

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
| Test passed |

View File

@@ -0,0 +1,9 @@
import csharp
from StringLiteral ascii, StringLiteral utf8
where
ascii.getValue().length() = 1048576 and
// UTF8 encoding can vary a little from platform to platform
utf8.getValue().length() > 440000 and
utf8.getValue().length() < 450000
select "Test passed"