C#: Make the trap compression algorithm configurable.

This commit is contained in:
Calum Grant
2019-08-12 16:29:33 +01:00
parent fe7e90e25a
commit 685c494bcb
9 changed files with 334 additions and 27 deletions

View File

@@ -54,14 +54,15 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>The full filepath of the trap file.</returns>
public string GetTrapPath(ILogger logger, string srcFile) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile);
public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
/// <summary>
/// Creates a trap writer for a given source/assembly file.
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>A newly created TrapWriter.</returns>
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates);
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression);
}
readonly SubProject DefaultProject;

View File

@@ -44,6 +44,11 @@ namespace Semmle.Extraction
/// </summary>
public bool Fast = false;
/// <summary>
/// The compression algorithm used for trap files.
/// </summary>
public TrapWriter.CompressionMode TrapCompression = TrapWriter.CompressionMode.Gzip;
public virtual bool handleOption(string key, string value)
{
switch (key)

View File

@@ -0,0 +1,258 @@
using System.IO;
namespace Semmle.Extraction
{
public static class TrapExtensions
{
public static void WriteLabel(this TextWriter writer, int value)
{
writer.Write('#');
writer.Write(value);
}
public static void WriteLabel(this TextWriter writer, IEntity entity)
{
writer.WriteLabel(entity.Label.Value);
}
public static void WriteSubId(this TextWriter writer, IEntity entity)
{
writer.Write('{');
writer.WriteLabel(entity);
writer.Write('}');
}
public static void WriteSeparator(this TextWriter writer, string separator, int index)
{
if (index > 0) writer.Write(separator);
}
// This is temporary and we can get rid of IId entirely
public static void WriteIId(this TextWriter writer, IId iid)
{
iid.AppendTo(writer);
}
public struct FirstParam
{
private readonly TextWriter Writer;
public FirstParam(TextWriter writer)
{
Writer = writer;
}
public NextParam Param(IEntity entity)
{
Writer.WriteLabel(entity.Label.Value);
return new NextParam(Writer);
}
public void EndTuple()
{
Writer.WriteLine(')');
}
}
public struct NextParam
{
private readonly TextWriter Writer;
public NextParam(TextWriter writer)
{
Writer = writer;
}
private void WriteComma()
{
Writer.Write(", ");
}
public NextParam Param(string str)
{
WriteComma();
Writer.WriteTrapString(str);
return this;
}
public NextParam Param(float f)
{
WriteComma();
Writer.WriteTrapFloat(f);
return this;
}
public NextParam Param(Label label)
{
WriteComma();
Writer.WriteLabel(label.Value);
return this;
}
public NextParam Param(int i)
{
WriteComma();
Writer.Write(i);
return this;
}
public NextParam Param(IEntity e)
{
WriteComma();
Writer.WriteLabel(e.Label.Value);
return this;
}
public void EndTuple()
{
Writer.WriteLine(')');
}
}
const int maxStringBytes = 1 << 20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static void WriteString(TextWriter writer, string s) => writer.Write(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="writer">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(TextWriter writer, string s, ref int bytesRemaining)
{
WriteString(writer, TruncateString(s, ref bytesRemaining));
}
public static void WriteTrapString(this TextWriter writer, string s)
{
writer.Write('\"');
if (NeedsTruncation(s))
{
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(writer, s, ref remaining);
}
else
{
// Fast path
WriteString(writer, s);
}
writer.Write('\"');
}
public static void WriteTrapFloat(this TextWriter writer, float f)
{
writer.Write(f.ToString("0.#####e0")); // Trap importer won't accept ints
}
public static FirstParam BeginTuple(this TextWriter writer, string name)
{
writer.Write(name);
writer.Write('(');
return new FirstParam(writer);
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1)
{
writer.BeginTuple(name).Param(p1).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3, IEntity p4)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).Param(p4).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2, int p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2, int p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3, IEntity p4, IEntity p5)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).Param(p4).Param(p5).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
// DELETEME
public static void Emit(this TextWriter writer, Tuple t)
{
t.EmitToTrapBuilder(writer);
}
}
}

View File

@@ -21,6 +21,13 @@ namespace Semmle.Extraction
RELATIVE
}
public enum CompressionMode
{
None,
Gzip,
Brotli
}
/// <summary>
/// The location of the src_archive directory.
/// </summary>
@@ -37,10 +44,14 @@ namespace Semmle.Extraction
readonly ILogger Logger;
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates)
readonly CompressionMode TrapCompression;
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates, CompressionMode trapCompression)
{
Logger = logger;
TrapFile = TrapPath(Logger, trap, outputfile);
TrapCompression = trapCompression;
TrapFile = TrapPath(Logger, trap, outputfile, trapCompression);
WriterLazy = new Lazy<StreamWriter>(() =>
{
@@ -62,7 +73,26 @@ namespace Semmle.Extraction
while (File.Exists(tmpFile));
var fileStream = new FileStream(tmpFile, FileMode.CreateNew, FileAccess.Write);
var compressionStream = new BrotliStream(fileStream, CompressionLevel.Fastest);
Stream compressionStream;
switch (trapCompression)
{
case CompressionMode.Brotli:
compressionStream = new BrotliStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.Gzip:
compressionStream = new GZipStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.None:
compressionStream = fileStream;
break;
default:
// Dead code
throw new ArgumentException(nameof(trapCompression));
}
return new StreamWriter(compressionStream, UTF8, 2000000);
});
this.archive = archive;
@@ -158,7 +188,7 @@ namespace Semmle.Extraction
if (existingHash != hash)
{
var root = TrapFile.Substring(0, TrapFile.Length - 8); // Remove trailing ".trap.gz"
if (TryMove(tmpFile, $"{root}-{hash}.trap.br"))
if (TryMove(tmpFile, $"{root}-{hash}.trap{TrapExtension(TrapCompression)}"))
return;
}
Logger.Log(Severity.Info, "Identical trap file for {0} already exists", TrapFile);
@@ -253,9 +283,20 @@ namespace Semmle.Extraction
return nested;
}
public static string TrapPath(ILogger logger, string folder, string filename)
static string TrapExtension(CompressionMode compression)
{
filename = Path.GetFullPath(filename) + ".trap.br";
switch (compression)
{
case CompressionMode.None: return "";
case CompressionMode.Gzip: return ".gz";
case CompressionMode.Brotli: return ".br";
default: throw new ArgumentException(nameof(compression));
}
}
public static string TrapPath(ILogger logger, string folder, string filename, TrapWriter.CompressionMode trapCompression)
{
filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}";
if (string.IsNullOrEmpty(folder))
folder = Directory.GetCurrentDirectory();