C#: Make the trap compression algorithm configurable.

This commit is contained in:
Calum Grant
2019-08-12 16:29:33 +01:00
parent fe7e90e25a
commit 685c494bcb
9 changed files with 334 additions and 27 deletions

View File

@@ -184,6 +184,7 @@ namespace Semmle.Extraction.CIL.Driver
public bool NoCache { get; private set; }
public int Threads { get; private set; }
public bool PDB { get; private set; }
public TrapWriter.CompressionMode TrapCompression { get; private set; }
void AddFileOrDirectory(string path)
{
@@ -220,6 +221,7 @@ namespace Semmle.Extraction.CIL.Driver
options.Verbosity = Verbosity.Info;
options.Threads = System.Environment.ProcessorCount;
options.PDB = true;
options.TrapCompression = TrapWriter.CompressionMode.Gzip;
foreach (var arg in args)
{

View File

@@ -21,13 +21,13 @@ namespace Semmle.Extraction.CIL.Driver
Console.WriteLine(" path A directory/dll/exe to analyze");
}
static void ExtractAssembly(Layout layout, string assemblyPath, ILogger logger, bool nocache, bool extractPdbs)
static void ExtractAssembly(Layout layout, string assemblyPath, ILogger logger, bool nocache, bool extractPdbs, TrapWriter.CompressionMode trapCompression)
{
string trapFile;
bool extracted;
var sw = new Stopwatch();
sw.Start();
Entities.Assembly.ExtractCIL(layout, assemblyPath, logger, nocache, extractPdbs, out trapFile, out extracted);
Entities.Assembly.ExtractCIL(layout, assemblyPath, logger, nocache, extractPdbs, trapCompression, out trapFile, out extracted);
sw.Stop();
logger.Log(Severity.Info, " {0} ({1})", assemblyPath, sw.Elapsed);
}
@@ -46,7 +46,7 @@ namespace Semmle.Extraction.CIL.Driver
var actions = options.
AssembliesToExtract.Select(asm => asm.filename).
Select<string, Action>(filename => () => ExtractAssembly(layout, filename, logger, options.NoCache, options.PDB)).
Select<string, Action>(filename => () => ExtractAssembly(layout, filename, logger, options.NoCache, options.PDB, options.TrapCompression)).
ToArray();
foreach (var missingRef in options.MissingReferences)

View File

@@ -117,7 +117,7 @@ namespace Semmle.Extraction.CIL.Entities
/// <param name="extractPdbs">Whether to extract PDBs.</param>
/// <param name="trapFile">The path of the trap file.</param>
/// <param name="extracted">Whether the file was extracted (false=cached).</param>
public static void ExtractCIL(Layout layout, string assemblyPath, ILogger logger, bool nocache, bool extractPdbs, out string trapFile, out bool extracted)
public static void ExtractCIL(Layout layout, string assemblyPath, ILogger logger, bool nocache, bool extractPdbs, TrapWriter.CompressionMode trapCompression, out string trapFile, out bool extracted)
{
trapFile = "";
extracted = false;
@@ -125,7 +125,7 @@ namespace Semmle.Extraction.CIL.Entities
{
var extractor = new Extractor(false, assemblyPath, logger);
var project = layout.LookupProjectOrDefault(assemblyPath);
using (var trapWriter = project.CreateTrapWriter(logger, assemblyPath + ".cil", true))
using (var trapWriter = project.CreateTrapWriter(logger, assemblyPath + ".cil", true, trapCompression))
{
trapFile = trapWriter.TrapFile;
if (nocache || !System.IO.File.Exists(trapFile))

View File

@@ -228,7 +228,7 @@ namespace Semmle.Extraction.CSharp
var assemblyPath = extractor.OutputPath;
var assembly = compilation.Assembly;
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true);
var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression);
compilationTrapFile = trapWriter; // Dispose later
var cx = extractor.CreateContext(compilation.Clone(), trapWriter, new AssemblyScope(assembly, assemblyPath, true));
@@ -257,7 +257,7 @@ namespace Semmle.Extraction.CSharp
var assemblyPath = r.FilePath;
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true))
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression))
{
var skipExtraction = FileIsCached(assemblyPath, trapWriter.TrapFile);
@@ -311,7 +311,7 @@ namespace Semmle.Extraction.CSharp
stopwatch.Start();
string trapFile;
bool extracted;
CIL.Entities.Assembly.ExtractCIL(layout, r.FilePath, Logger, !options.Cache, options.PDB, out trapFile, out extracted);
CIL.Entities.Assembly.ExtractCIL(layout, r.FilePath, Logger, !options.Cache, options.PDB, options.TrapCompression, out trapFile, out extracted);
stopwatch.Stop();
ReportProgress(r.FilePath, trapFile, stopwatch.Elapsed, extracted ? AnalysisAction.Extracted : AnalysisAction.UpToDate);
}
@@ -359,13 +359,13 @@ namespace Semmle.Extraction.CSharp
var projectLayout = layout.LookupProjectOrNull(sourcePath);
bool excluded = projectLayout == null;
string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, sourcePath);
string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, sourcePath, options.TrapCompression);
bool upToDate = false;
if (!excluded)
{
// compilation.Clone() is used to allow symbols to be garbage collected.
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, sourcePath, false))
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, sourcePath, false, options.TrapCompression))
{
upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile);

View File

@@ -28,13 +28,13 @@ namespace Semmle.Extraction.Tests
Assert.NotEqual(Directory.GetCurrentDirectory(), tmpDir);
return;
}
var f1 = project.GetTrapPath(Logger, "foo.cs");
var f1 = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.Equal(f1, g1);
// Test trap file generation
var trapwriterFilename = project.GetTrapPath(Logger, "foo.cs");
using (var trapwriter = project.CreateTrapWriter(Logger, "foo.cs", false))
var trapwriterFilename = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
using (var trapwriter = project.CreateTrapWriter(Logger, "foo.cs", false, TrapWriter.CompressionMode.Gzip))
{
trapwriter.Emit("1=*");
Assert.False(File.Exists(trapwriterFilename));
@@ -72,12 +72,12 @@ namespace Semmle.Extraction.Tests
// Test the trap file
var project = layout.LookupProjectOrNull("bar.cs");
var trapwriterFilename = project.GetTrapPath(Logger, "bar.cs");
var trapwriterFilename = project.GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip);
Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE),
trapwriterFilename);
// Test the source archive
var trapWriter = project.CreateTrapWriter(Logger, "bar.cs", false);
var trapWriter = project.CreateTrapWriter(Logger, "bar.cs", false, TrapWriter.CompressionMode.Gzip);
trapWriter.Archive("layout.txt", System.Text.Encoding.ASCII);
var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.True(File.Exists(writtenFile));
@@ -90,7 +90,7 @@ namespace Semmle.Extraction.Tests
// When you specify both a trap file and a layout, use the trap file.
var layout = new Semmle.Extraction.Layout(Path.GetFullPath("snapshot\\trap"), null, "something.txt");
Assert.True(layout.FileInLayout("bar.cs"));
var f1 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs");
var f1 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.Equal(f1, g1);
}
@@ -118,19 +118,19 @@ namespace Semmle.Extraction.Tests
// Use Section 2
Assert.True(layout.FileInLayout("bar.cs"));
var f1 = layout.LookupProjectOrNull("bar.cs").GetTrapPath(Logger, "bar.cs");
var f1 = layout.LookupProjectOrNull("bar.cs").GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.Equal(f1, g1);
// Use Section 1
Assert.True(layout.FileInLayout("foo.cs"));
var f2 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs");
var f2 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.Equal(f2, g2);
// boo.dll is not in the layout, so use layout from first section.
Assert.False(layout.FileInLayout("boo.dll"));
var f3 = layout.LookupProjectOrDefault("boo.dll").GetTrapPath(Logger, "boo.dll");
var f3 = layout.LookupProjectOrDefault("boo.dll").GetTrapPath(Logger, "boo.dll", TrapWriter.CompressionMode.Gzip);
var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.Equal(f3, g3);

View File

@@ -54,14 +54,15 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>The full filepath of the trap file.</returns>
public string GetTrapPath(ILogger logger, string srcFile) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile);
public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
/// <summary>
/// Creates a trap writer for a given source/assembly file.
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>A newly created TrapWriter.</returns>
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates);
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression);
}
readonly SubProject DefaultProject;

View File

@@ -44,6 +44,11 @@ namespace Semmle.Extraction
/// </summary>
public bool Fast = false;
/// <summary>
/// The compression algorithm used for trap files.
/// </summary>
public TrapWriter.CompressionMode TrapCompression = TrapWriter.CompressionMode.Gzip;
public virtual bool handleOption(string key, string value)
{
switch (key)

View File

@@ -0,0 +1,258 @@
using System.IO;
namespace Semmle.Extraction
{
public static class TrapExtensions
{
public static void WriteLabel(this TextWriter writer, int value)
{
writer.Write('#');
writer.Write(value);
}
public static void WriteLabel(this TextWriter writer, IEntity entity)
{
writer.WriteLabel(entity.Label.Value);
}
public static void WriteSubId(this TextWriter writer, IEntity entity)
{
writer.Write('{');
writer.WriteLabel(entity);
writer.Write('}');
}
public static void WriteSeparator(this TextWriter writer, string separator, int index)
{
if (index > 0) writer.Write(separator);
}
// This is temporary and we can get rid of IId entirely
public static void WriteIId(this TextWriter writer, IId iid)
{
iid.AppendTo(writer);
}
public struct FirstParam
{
private readonly TextWriter Writer;
public FirstParam(TextWriter writer)
{
Writer = writer;
}
public NextParam Param(IEntity entity)
{
Writer.WriteLabel(entity.Label.Value);
return new NextParam(Writer);
}
public void EndTuple()
{
Writer.WriteLine(')');
}
}
public struct NextParam
{
private readonly TextWriter Writer;
public NextParam(TextWriter writer)
{
Writer = writer;
}
private void WriteComma()
{
Writer.Write(", ");
}
public NextParam Param(string str)
{
WriteComma();
Writer.WriteTrapString(str);
return this;
}
public NextParam Param(float f)
{
WriteComma();
Writer.WriteTrapFloat(f);
return this;
}
public NextParam Param(Label label)
{
WriteComma();
Writer.WriteLabel(label.Value);
return this;
}
public NextParam Param(int i)
{
WriteComma();
Writer.Write(i);
return this;
}
public NextParam Param(IEntity e)
{
WriteComma();
Writer.WriteLabel(e.Label.Value);
return this;
}
public void EndTuple()
{
Writer.WriteLine(')');
}
}
const int maxStringBytes = 1 << 20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static void WriteString(TextWriter writer, string s) => writer.Write(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="writer">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(TextWriter writer, string s, ref int bytesRemaining)
{
WriteString(writer, TruncateString(s, ref bytesRemaining));
}
public static void WriteTrapString(this TextWriter writer, string s)
{
writer.Write('\"');
if (NeedsTruncation(s))
{
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(writer, s, ref remaining);
}
else
{
// Fast path
WriteString(writer, s);
}
writer.Write('\"');
}
public static void WriteTrapFloat(this TextWriter writer, float f)
{
writer.Write(f.ToString("0.#####e0")); // Trap importer won't accept ints
}
public static FirstParam BeginTuple(this TextWriter writer, string name)
{
writer.Write(name);
writer.Write('(');
return new FirstParam(writer);
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1)
{
writer.BeginTuple(name).Param(p1).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3, IEntity p4)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).Param(p4).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2, int p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2, int p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, IEntity p2, IEntity p3)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2, IEntity p3, IEntity p4, IEntity p5)
{
writer.BeginTuple(name).Param(p1).Param(p2).Param(p3).Param(p4).Param(p5).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, int p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
public static void WriteTuple(this TextWriter writer, string name, IEntity p1, string p2)
{
writer.BeginTuple(name).Param(p1).Param(p2).EndTuple();
}
// DELETEME
public static void Emit(this TextWriter writer, Tuple t)
{
t.EmitToTrapBuilder(writer);
}
}
}

View File

@@ -21,6 +21,13 @@ namespace Semmle.Extraction
RELATIVE
}
public enum CompressionMode
{
None,
Gzip,
Brotli
}
/// <summary>
/// The location of the src_archive directory.
/// </summary>
@@ -37,10 +44,14 @@ namespace Semmle.Extraction
readonly ILogger Logger;
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates)
readonly CompressionMode TrapCompression;
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates, CompressionMode trapCompression)
{
Logger = logger;
TrapFile = TrapPath(Logger, trap, outputfile);
TrapCompression = trapCompression;
TrapFile = TrapPath(Logger, trap, outputfile, trapCompression);
WriterLazy = new Lazy<StreamWriter>(() =>
{
@@ -62,7 +73,26 @@ namespace Semmle.Extraction
while (File.Exists(tmpFile));
var fileStream = new FileStream(tmpFile, FileMode.CreateNew, FileAccess.Write);
var compressionStream = new BrotliStream(fileStream, CompressionLevel.Fastest);
Stream compressionStream;
switch (trapCompression)
{
case CompressionMode.Brotli:
compressionStream = new BrotliStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.Gzip:
compressionStream = new GZipStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.None:
compressionStream = fileStream;
break;
default:
// Dead code
throw new ArgumentException(nameof(trapCompression));
}
return new StreamWriter(compressionStream, UTF8, 2000000);
});
this.archive = archive;
@@ -158,7 +188,7 @@ namespace Semmle.Extraction
if (existingHash != hash)
{
var root = TrapFile.Substring(0, TrapFile.Length - 8); // Remove trailing ".trap.gz"
if (TryMove(tmpFile, $"{root}-{hash}.trap.br"))
if (TryMove(tmpFile, $"{root}-{hash}.trap{TrapExtension(TrapCompression)}"))
return;
}
Logger.Log(Severity.Info, "Identical trap file for {0} already exists", TrapFile);
@@ -253,9 +283,20 @@ namespace Semmle.Extraction
return nested;
}
public static string TrapPath(ILogger logger, string folder, string filename)
static string TrapExtension(CompressionMode compression)
{
filename = Path.GetFullPath(filename) + ".trap.br";
switch (compression)
{
case CompressionMode.None: return "";
case CompressionMode.Gzip: return ".gz";
case CompressionMode.Brotli: return ".br";
default: throw new ArgumentException(nameof(compression));
}
}
public static string TrapPath(ILogger logger, string folder, string filename, TrapWriter.CompressionMode trapCompression)
{
filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}";
if (string.IsNullOrEmpty(folder))
folder = Directory.GetCurrentDirectory();