Merge pull request #1749 from calumgrant/cs/extractor-tidy

C#: Refactor extractor trap generation code
This commit is contained in:
Tom Hvitved
2019-08-30 15:44:35 +02:00
committed by GitHub
155 changed files with 3268 additions and 2381 deletions

View File

@@ -1,126 +1,15 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Text;
using Semmle.Util;
using System.Collections.Generic;
using System.Linq;
namespace Semmle.Extraction.CommentProcessing
{
// The lexical type of the comment.
public enum CommentType
{
Singleline, // Comment starting // ...
XmlDoc, // Comment starting /// ...
Multiline, // Comment starting /* ..., even if the comment only spans one line.
MultilineContinuation // The second and subsequent lines of comment in a multiline comment.
};
// Relationship between a comment and a program element.
public enum Binding
{
Parent, // The parent element of a comment
Best, // The most likely element associated with a comment
Before, // The element before the comment
After // The element after the comment
};
/// <summary>
/// A single line of text in a comment.
/// </summary>
public interface ICommentLine
{
Location Location { get; }
CommentType Type { get; }
// Trimmed text of the comment.
string Text { get; }
// Complete text of the comment including leading/trailing whitespace and comment markers.
string RawText { get; }
}
/// <summary>
/// A block of comment lines combined into one unit.
/// </summary>
public interface ICommentBlock
{
Location Location { get; }
IList<ICommentLine> CommentLines { get; }
}
/// <summary>
/// Output for generated comment associations.
/// </summary>
/// <param name="elementLabel">The label of the element</param>
/// <param name="duplicationGuardKey">The duplication guard key of the element, if any</param>
/// <param name="commentBlock">The comment block associated with the element</param>
/// <param name="binding">The relationship between the commentblock and the element</param>
public delegate void CommentBinding(Label elementLabel, Key duplicationGuardKey, ICommentBlock commentBlock, Binding binding);
/// <summary>
/// Used by the populator to generate binding information between comments and program elements.
/// </summary>
public interface ICommentGenerator
{
/// <summary>
/// Registers the location of a program element to associate comments with.
/// Can be called in any order.
/// </summary>
/// <param name="elementLabel">Label of the element.</param>
/// <param name="duplicationGuardKey">The duplication guard key of the element, if any.</param>
/// <param name="location">Location of the element.</param>
void RegisterElementLocation(Label elementLabel, Key duplicationGuardKey, Location location);
void AddComment(ICommentLine comment);
/// <summary>
/// Generate all binding information.
/// </summary>
/// <param name="cb">Receiver of the binding information.</param>
void GenerateBindings(CommentBinding cb);
}
static class LocationExtension
{
public static int StartLine(this Location loc) => loc.GetLineSpan().Span.Start.Line;
public static int StartColumn(this Location loc) => loc.GetLineSpan().Span.Start.Character;
public static int EndLine(this Location loc) => loc.GetLineSpan().Span.End.Line;
/// <summary>
/// Whether one Location outer completely contains another Location inner.
/// </summary>
/// <param name="outer">The outer location.</param>
/// <param name="inner">The inner location</param>
/// <returns>Whether inner is completely container in outer.</returns>
public static bool Contains(this Location outer, Location inner)
{
bool sameFile = outer.SourceTree == inner.SourceTree;
bool startsBefore = outer.SourceSpan.Start <= inner.SourceSpan.Start;
bool endsAfter = outer.SourceSpan.End >= inner.SourceSpan.End;
return sameFile && startsBefore && endsAfter;
}
/// <summary>
/// Whether one Location ends before another starts.
/// </summary>
/// <param name="before">The Location coming before</param>
/// <param name="after">The Location coming after</param>
/// <returns>Whether 'before' comes before 'after'.</returns>
public static bool Before(this Location before, Location after)
{
bool sameFile = before.SourceTree == after.SourceTree;
bool endsBefore = before.SourceSpan.End <= after.SourceSpan.Start;
return sameFile && endsBefore;
}
}
/// <summary>
/// Implements the comment processor.
/// Registers locations of comments and program elements,
/// then generates binding information.
/// Implements the comment processor for associating comments with program elements.
/// Registers locations of comments and program elements,
/// then generates binding information.
/// </summary>
class CommentProcessor : ICommentGenerator
{
@@ -130,14 +19,14 @@ namespace Semmle.Extraction.CommentProcessing
}
// Comments sorted by location.
readonly SortedDictionary<Location, ICommentLine> comments = new SortedDictionary<Location, ICommentLine>(new LocationComparer());
private readonly SortedDictionary<Location, ICommentLine> comments = new SortedDictionary<Location, ICommentLine>(new LocationComparer());
// Program elements sorted by location.
readonly SortedDictionary<Location, Label> elements = new SortedDictionary<Location, Label>(new LocationComparer());
private readonly SortedDictionary<Location, Label> elements = new SortedDictionary<Location, Label>(new LocationComparer());
readonly Dictionary<Label, Key> duplicationGuardKeys = new Dictionary<Label, Key>();
private readonly Dictionary<Label, Key> duplicationGuardKeys = new Dictionary<Label, Key>();
Key GetDuplicationGuardKey(Label label)
private Key GetDuplicationGuardKey(Label label)
{
Key duplicationGuardKey;
if (duplicationGuardKeys.TryGetValue(label, out duplicationGuardKey))
@@ -151,7 +40,7 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Comparer for two locations, allowing them to be inserted into a sorted list.
/// Comparer for two locations, allowing them to be inserted into a sorted list.
/// </summary>
/// <param name="l1">First location</param>
/// <param name="l2">Second location</param>
@@ -166,12 +55,12 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Called by the populator when there is a program element which can have comments.
/// Called by the populator when there is a program element which can have comments.
/// </summary>
/// <param name="elementLabel">The label of the element in the trap file.</param>
/// <param name="duplicationGuardKey">The duplication guard key of the element, if any.</param>
/// <param name="loc">The location of the element.</param>
public void RegisterElementLocation(Label elementLabel, Key duplicationGuardKey, Location loc)
public void AddElement(Label elementLabel, Key duplicationGuardKey, Location loc)
{
if (loc != null && loc.IsInSource)
elements[loc] = elementLabel;
@@ -188,21 +77,21 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Generate the bindings between a comment and program elements.
/// Called once for each commentBlock.
/// Generate the bindings between a comment and program elements.
/// Called once for each commentBlock.
/// </summary>
///
/// <param name="commentBlock">The comment block.</param>
/// <param name="previousElement">The element before the comment block.</param>
/// <param name="nextElement">The element after the comment block.</param>
/// <param name="parentElement">The parent element of the comment block.</param>
/// <param name="cb">Output binding information.</param>
/// <param name="callback">Output binding information.</param>
void GenerateBindings(
ICommentBlock commentBlock,
KeyValuePair<Location, Label>? previousElement,
KeyValuePair<Location, Label>? nextElement,
KeyValuePair<Location, Label>? parentElement,
CommentBinding cb
CommentBindingCallback callback
)
{
EnsureSameFile(commentBlock, ref previousElement);
@@ -212,19 +101,19 @@ namespace Semmle.Extraction.CommentProcessing
if (previousElement != null)
{
var key = previousElement.Value.Value;
cb(key, GetDuplicationGuardKey(key), commentBlock, Binding.Before);
callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.Before);
}
if (nextElement != null)
{
var key = nextElement.Value.Value;
cb(key, GetDuplicationGuardKey(key), commentBlock, Binding.After);
callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.After);
}
if (parentElement != null)
{
var key = parentElement.Value.Value;
cb(key, GetDuplicationGuardKey(key), commentBlock, Binding.Parent);
callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.Parent);
}
// Heuristic to decide which is the "best" element associated with the comment.
@@ -274,19 +163,19 @@ namespace Semmle.Extraction.CommentProcessing
if (bestElement != null)
{
var label = bestElement.Value.Value;
cb(label, GetDuplicationGuardKey(label), commentBlock, Binding.Best);
callback(label, GetDuplicationGuardKey(label), commentBlock, CommentBinding.Best);
}
}
// Stores element nesting information in a stack.
// Top of stack = most nested element, based on Location.
class ElementStack
private class ElementStack
{
// Invariant: the top of the stack must be contained by items below it.
readonly Stack<KeyValuePair<Location, Label>> elementStack = new Stack<KeyValuePair<Location, Label>>();
/// <summary>
/// Add a new element to the stack.
/// Add a new element to the stack.
/// </summary>
/// The stack is maintained.
/// <param name="value">The new element to push.</param>
@@ -300,7 +189,7 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Locate the parent of a comment with location l.
/// Locate the parent of a comment with location l.
/// </summary>
/// <param name="l">The location of the comment.</param>
/// <returns>An element completely containing l, or null if none found.</returns>
@@ -320,7 +209,7 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Finds the element after the comment.
/// Finds the element after the comment.
/// </summary>
/// <param name="comment">The location of the comment.</param>
/// <param name="next">The next element.</param>
@@ -333,14 +222,14 @@ namespace Semmle.Extraction.CommentProcessing
}
// Generate binding information for one CommentBlock.
void GenerateBindings(
private void GenerateBindings(
ICommentBlock block,
ElementStack elementStack,
KeyValuePair<Location, Label>? nextElement,
CommentBinding cb
CommentBindingCallback cb
)
{
if (block.CommentLines.Count > 0)
if (block.CommentLines.Any())
{
GenerateBindings(
block,
@@ -352,9 +241,10 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Process comments up until nextElement.
/// Group comments into blocks, and associate blocks with elements.
/// Process comments up until nextElement.
/// Group comments into blocks, and associate blocks with elements.
/// </summary>
///
/// <param name="commentEnumerator">Enumerator for all comments in the program.</param>
/// <param name="nextElement">The next element in the list.</param>
/// <param name="elementStack">A stack of nested program elements.</param>
@@ -364,7 +254,7 @@ namespace Semmle.Extraction.CommentProcessing
IEnumerator<KeyValuePair<Location, ICommentLine>> commentEnumerator,
KeyValuePair<Location, Label>? nextElement,
ElementStack elementStack,
CommentBinding cb
CommentBindingCallback cb
)
{
CommentBlock block = new CommentBlock();
@@ -395,10 +285,10 @@ namespace Semmle.Extraction.CommentProcessing
}
/// <summary>
/// Merge comments into blocks and associate comment blocks with program elements.
/// Merge comments into blocks and associate comment blocks with program elements.
/// </summary>
/// <param name="cb">Callback for the binding information</param>
public void GenerateBindings(CommentBinding cb)
public void GenerateBindings(CommentBindingCallback cb)
{
/* Algorithm:
* Do a merge of elements and comments, which are both sorted in location order.
@@ -442,7 +332,9 @@ namespace Semmle.Extraction.CommentProcessing
class CommentBlock : ICommentBlock
{
public IList<ICommentLine> CommentLines { get; } = new List<ICommentLine>();
private readonly List<ICommentLine> lines = new List<ICommentLine>();
public IEnumerable<ICommentLine> CommentLines => lines;
public Location Location { get; private set; }
@@ -453,7 +345,7 @@ namespace Semmle.Extraction.CommentProcessing
/// <returns>Whether the new line should be appended to this block.</returns>
public bool CombinesWith(ICommentLine newLine)
{
if (CommentLines.Count == 0) return true;
if (!CommentLines.Any()) return true;
bool sameFile = Location.SourceTree == newLine.Location.SourceTree;
bool sameRow = Location.EndLine() == newLine.Location.StartLine();
@@ -462,21 +354,20 @@ namespace Semmle.Extraction.CommentProcessing
bool adjacent = sameFile && (sameRow || (sameColumn && nextRow));
return
newLine.Type == CommentType.MultilineContinuation ||
newLine.Type == CommentLineType.MultilineContinuation ||
adjacent;
}
/// <summary>
/// Adds a comment line to the this comment block.
/// Adds a comment line to the this comment block.
/// </summary>
/// <param name="line">The line to add.</param>
public void AddCommentLine(ICommentLine line)
{
Location = CommentLines.Count == 0 ?
Location = !lines.Any() ?
line.Location :
Location.Create(line.Location.SourceTree, new TextSpan(Location.SourceSpan.Start, line.Location.SourceSpan.End - Location.SourceSpan.Start));
CommentLines.Add(line);
lines.Add(line);
}
}
}

View File

@@ -0,0 +1,105 @@
using Microsoft.CodeAnalysis;
using System.Collections.Generic;
namespace Semmle.Extraction.CommentProcessing
{
/// <summary>
/// The type of a single comment line.
/// </summary>
public enum CommentLineType
{
Singleline, // Comment starting // ...
XmlDoc, // Comment starting /// ...
Multiline, // Comment starting /* ..., even if the comment only spans one line.
MultilineContinuation // The second and subsequent lines of comment in a multiline comment.
};
/// <summary>
/// Describes the relationship between a comment and a program element.
/// </summary>
public enum CommentBinding
{
Parent, // The parent element of a comment
Best, // The most likely element associated with a comment
Before, // The element before the comment
After // The element after the comment
};
/// <summary>
/// A single line in a comment.
/// </summary>
public interface ICommentLine
{
/// <summary>
/// The location of this comment line.
/// </summary>
Location Location { get; }
/// <summary>
/// The type of this comment line.
/// </summary>
CommentLineType Type { get; }
/// <summary>
/// The text body of this comment line, excluding comment delimiter and leading and trailing whitespace.
/// </summary>
string Text { get; }
/// <summary>
/// Full text of the comment including leading/trailing whitespace and comment delimiters.
/// </summary>
string RawText { get; }
}
/// <summary>
/// A block of comment lines combined into one unit.
/// </summary>
public interface ICommentBlock
{
/// <summary>
/// The full span of this comment block.
/// </summary>
Location Location { get; }
/// <summary>
/// The individual lines in the comment.
/// </summary>
IEnumerable<ICommentLine> CommentLines { get; }
}
/// <summary>
/// Callback for generated comment associations.
/// </summary>
/// <param name="elementLabel">The label of the element</param>
/// <param name="duplicationGuardKey">The duplication guard key of the element, if any</param>
/// <param name="commentBlock">The comment block associated with the element</param>
/// <param name="binding">The relationship between the commentblock and the element</param>
public delegate void CommentBindingCallback(Label elementLabel, Key duplicationGuardKey, ICommentBlock commentBlock, CommentBinding binding);
/// <summary>
/// Computes the binding information between comments and program elements.
/// </summary>
public interface ICommentGenerator
{
/// <summary>
/// Registers the location of a program element to associate comments with.
/// This can be called in any order.
/// </summary>
/// <param name="elementLabel">Label of the element.</param>
/// <param name="duplicationGuardKey">The duplication guard key of the element, if any.</param>
/// <param name="location">Location of the element.</param>
void AddElement(Label elementLabel, Key duplicationGuardKey, Location location);
/// <summary>
/// Registers a line of comment.
/// </summary>
/// <param name="comment">The comment to register.</param>
void AddComment(ICommentLine comment);
/// <summary>
/// Computes the binding information and calls `cb` with all of the comment binding information.
/// </summary>
/// <param name="cb">Receiver of the binding information.</param>
void GenerateBindings(CommentBindingCallback cb);
}
}

View File

@@ -1,21 +1,22 @@
using Microsoft.CodeAnalysis;
using System.Linq;
using Semmle.Extraction.CommentProcessing;
using System.Collections.Generic;
using System;
using Semmle.Util.Logging;
using Semmle.Extraction.Entities;
using Semmle.Util.Logging;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Semmle.Extraction
{
/// <summary>
/// State which needs needs to be available throughout the extraction process.
/// There is one Context object per trap output file.
/// State that needs to be available throughout the extraction process.
/// There is one Context object per trap output file.
/// </summary>
public class Context
{
/// <summary>
/// Interface to various extraction functions, e.g. logger, trap writer.
/// Access various extraction functions, e.g. logger, trap writer.
/// </summary>
public readonly IExtractor Extractor;
@@ -23,7 +24,7 @@ namespace Semmle.Extraction
/// The program database provided by Roslyn.
/// There's one per syntax tree, which makes things awkward.
/// </summary>
public SemanticModel Model(SyntaxNode node)
public SemanticModel GetModel(SyntaxNode node)
{
if (cachedModel == null || node.SyntaxTree != cachedModel.SyntaxTree)
{
@@ -33,14 +34,14 @@ namespace Semmle.Extraction
return cachedModel;
}
SemanticModel cachedModel;
private SemanticModel cachedModel;
/// <summary>
/// Access to the trap file.
/// Access to the trap file.
/// </summary>
public readonly TrapWriter TrapWriter;
int NewId() => TrapWriter.IdCounter++;
int GetNewId() => TrapWriter.IdCounter++;
/// <summary>
/// Creates a new entity using the factory.
@@ -53,6 +54,30 @@ namespace Semmle.Extraction
return init == null ? CreateEntity2(factory, init) : CreateNonNullEntity(factory, init);
}
// A recursion guard against writing to the trap file whilst writing an id to the trap file.
bool WritingLabel = false;
public void DefineLabel(IEntity entity, TextWriter trapFile)
{
if (WritingLabel)
{
// Don't define a label whilst writing a label.
PopulateLater(() => DefineLabel(entity, trapFile));
}
else
{
try
{
WritingLabel = true;
entity.DefineLabel(trapFile);
}
finally
{
WritingLabel = false;
}
}
}
/// <summary>
/// Creates a new entity using the factory.
/// Uses a different cache to <see cref="CreateEntity{Type, Entity}(ICachedEntityFactory{Type, Entity}, Type)"/>,
@@ -73,23 +98,29 @@ namespace Semmle.Extraction
}
else
{
var id = entity.Id;
#if DEBUG_LABELS
CheckEntityHasUniqueLabel(id, entity);
#endif
label = new Label(NewId());
label = GetNewLabel();
entity.Label = label;
entityLabelCache[entity] = label;
DefineLabel(label, id);
DefineLabel(entity, TrapWriter.Writer);
if (entity.NeedsPopulation)
Populate(init as ISymbol, entity);
#if DEBUG_LABELS
using (var id = new StringWriter())
{
entity.WriteId(id);
CheckEntityHasUniqueLabel(id.ToString(), entity);
}
#endif
}
return entity;
}
}
#if DEBUG_LABELS
private void CheckEntityHasUniqueLabel(IId id, ICachedEntity entity)
private void CheckEntityHasUniqueLabel(string id, ICachedEntity entity)
{
if (idLabelCache.TryGetValue(id, out var originalEntity))
{
@@ -102,6 +133,8 @@ namespace Semmle.Extraction
}
#endif
public Label GetNewLabel() => new Label(GetNewId());
private Entity CreateNonNullEntity<Type, Entity>(ICachedEntityFactory<Type, Entity> factory, Type init) where Entity : ICachedEntity
{
if (objectEntityCache.TryGetValue(init, out var cached))
@@ -109,22 +142,24 @@ namespace Semmle.Extraction
using (StackGuard)
{
var label = new Label(NewId());
var label = GetNewLabel();
var entity = factory.Create(this, init);
entity.Label = label;
objectEntityCache[init] = entity;
var id = entity.Id;
DefineLabel(label, id);
#if DEBUG_LABELS
CheckEntityHasUniqueLabel(id, entity);
#endif
DefineLabel(entity, TrapWriter.Writer);
if (entity.NeedsPopulation)
Populate(init as ISymbol, entity);
#if DEBUG_LABELS
using (var id = new StringWriter())
{
entity.WriteId(id);
CheckEntityHasUniqueLabel(id.ToString(), entity);
}
#endif
return entity;
}
}
@@ -158,29 +193,17 @@ namespace Semmle.Extraction
/// </summary>
public void AddFreshLabel(IEntity entity)
{
var label = new Label(NewId());
TrapWriter.Emit(new DefineFreshLabelEmitter(label));
entity.Label = label;
entity.Label = GetNewLabel();
entity.DefineFreshLabel(TrapWriter.Writer);
}
#if DEBUG_LABELS
readonly Dictionary<IId, ICachedEntity> idLabelCache = new Dictionary<IId, ICachedEntity>();
readonly Dictionary<string, ICachedEntity> idLabelCache = new Dictionary<string, ICachedEntity>();
#endif
readonly Dictionary<object, ICachedEntity> objectEntityCache = new Dictionary<object, ICachedEntity>();
readonly Dictionary<ICachedEntity, Label> entityLabelCache = new Dictionary<ICachedEntity, Label>();
readonly HashSet<Label> extractedGenerics = new HashSet<Label>();
public void DefineLabel(IEntity entity)
{
entity.Label = new Label(NewId());
DefineLabel(entity.Label, entity.Id);
}
void DefineLabel(Label label, IId id)
{
TrapWriter.Emit(new DefineLabelEmitter(label, id));
}
/// <summary>
/// Queue of items to populate later.
/// The only reason for this is so that the call stack does not
@@ -300,43 +323,6 @@ namespace Semmle.Extraction
}
}
class DefineLabelEmitter : ITrapEmitter
{
readonly Label label;
readonly IId id;
public DefineLabelEmitter(Label label, IId id)
{
this.label = label;
this.id = id;
}
public void EmitToTrapBuilder(ITrapBuilder tb)
{
label.AppendTo(tb);
tb.Append("=");
id.AppendTo(tb);
tb.AppendLine();
}
}
class DefineFreshLabelEmitter : ITrapEmitter
{
readonly Label Label;
public DefineFreshLabelEmitter(Label label)
{
Label = label;
}
public void EmitToTrapBuilder(ITrapBuilder tb)
{
Label.AppendTo(tb);
tb.Append("=*");
tb.AppendLine();
}
}
class PushEmitter : ITrapEmitter
{
readonly Key Key;
@@ -346,20 +332,19 @@ namespace Semmle.Extraction
Key = key;
}
public void EmitToTrapBuilder(ITrapBuilder tb)
public void EmitTrap(TextWriter trapFile)
{
tb.Append(".push ");
Key.AppendTo(tb);
tb.AppendLine();
trapFile.Write(".push ");
Key.AppendTo(trapFile);
trapFile.WriteLine();
}
}
class PopEmitter : ITrapEmitter
{
public void EmitToTrapBuilder(ITrapBuilder tb)
public void EmitTrap(TextWriter trapFile)
{
tb.Append(".pop");
tb.AppendLine();
trapFile.WriteLine(".pop");
}
}
@@ -373,6 +358,13 @@ namespace Semmle.Extraction
/// <exception cref="InternalError">Thrown on invalid trap stack behaviour.</exception>
public void Populate(ISymbol optionalSymbol, ICachedEntity entity)
{
if (WritingLabel)
{
// Don't write tuples etc if we're currently defining a label
PopulateLater(() => Populate(optionalSymbol, entity));
return;
}
bool duplicationGuard;
bool deferred;
@@ -401,8 +393,8 @@ namespace Semmle.Extraction
}
var a = duplicationGuard ?
(Action)(() => WithDuplicationGuard(new Key(entity, this.Create(entity.ReportingLocation)), entity.Populate)) :
(Action)(() => this.Try(null, optionalSymbol, entity.Populate));
(Action)(() => WithDuplicationGuard(new Key(entity, this.Create(entity.ReportingLocation)), () => entity.Populate(TrapWriter.Writer))) :
(Action)(() => this.Try(null, optionalSymbol, () => entity.Populate(TrapWriter.Writer)));
if (deferred)
populateQueue.Enqueue(a);
@@ -448,7 +440,7 @@ namespace Semmle.Extraction
public void BindComments(IEntity entity, Microsoft.CodeAnalysis.Location l)
{
var duplicationGuardKey = tagStack.Count > 0 ? tagStack.Peek() : null;
CommentGenerator.RegisterElementLocation(entity.Label, duplicationGuardKey, l);
CommentGenerator.AddElement(entity.Label, duplicationGuardKey, l);
}
/// <summary>

View File

@@ -1,4 +1,5 @@
using Microsoft.CodeAnalysis;
using System.IO;
namespace Semmle.Extraction.Entities
{
@@ -25,12 +26,12 @@ namespace Semmle.Extraction.Entities
}
}
public override void Populate()
public override void Populate(TextWriter trapFile)
{
if (assemblyPath != null)
{
Context.Emit(Tuples.assemblies(this, File.Create(Context, assemblyPath), assembly.ToString(),
assembly.Identity.Name, assembly.Identity.Version.ToString()));
trapFile.assemblies(this, File.Create(Context, assemblyPath), assembly.ToString(),
assembly.Identity.Name, assembly.Identity.Version.ToString());
}
}
@@ -65,14 +66,15 @@ namespace Semmle.Extraction.Entities
return AssemblyConstructorFactory.Instance.CreateEntity(cx, null);
}
public override IId Id
public override void WriteId(System.IO.TextWriter trapFile)
{
get
trapFile.Write(assembly.ToString());
if (!(assemblyPath is null))
{
return assemblyPath == null
? new Key(assembly, ";assembly")
: new Key(assembly, "#file:///", assemblyPath.Replace("\\", "/"), ";assembly");
trapFile.Write("#file:///");
trapFile.Write(assemblyPath.Replace("\\", "/"));
}
trapFile.Write(";assembly");
}
}
}

View File

@@ -1,10 +1,20 @@
namespace Semmle.Extraction.Entities
using System.IO;
namespace Semmle.Extraction.Entities
{
class ExtractionMessage : FreshEntity
{
readonly Message msg;
public ExtractionMessage(Context cx, Message msg) : base(cx)
{
cx.Emit(Tuples.extractor_messages(this, msg.Severity, "C# extractor", msg.Text, msg.EntityText, msg.Location, msg.StackTrace));
this.msg = msg;
TryPopulate();
}
protected override void Populate(TextWriter trapFile)
{
trapFile.extractor_messages(this, msg.Severity, "C# extractor", msg.Text, msg.EntityText, msg.Location, msg.StackTrace);
}
public override TrapStackBehaviour TrapStackBehaviour => TrapStackBehaviour.NoLabel;

View File

@@ -23,11 +23,11 @@ namespace Semmle.Extraction.Entities
public override bool NeedsPopulation => Context.DefinesFile(Path) || Path == Context.Extractor.OutputPath;
public override void Populate()
public override void Populate(TextWriter trapFile)
{
if (Path == null)
{
Context.Emit(Tuples.files(this, "", "", ""));
trapFile.files(this, "", "", "");
}
else
{
@@ -41,9 +41,9 @@ namespace Semmle.Extraction.Entities
// remove the dot from the extension
if (extension.Length > 0)
extension = extension.Substring(1);
Context.Emit(Tuples.files(this, PathAsDatabaseString(Path), name, extension));
trapFile.files(this, PathAsDatabaseString(Path), name, extension);
Context.Emit(Tuples.containerparent(Entities.Folder.Create(Context, fi.Directory), this));
trapFile.containerparent(Folder.Create(Context, fi.Directory), this);
if (fromSource == 1)
{
foreach (var text in Context.Compilation.SyntaxTrees.
@@ -54,22 +54,23 @@ namespace Semmle.Extraction.Entities
var lineCounts = LineCounter.ComputeLineCounts(rawText);
if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++;
Context.Emit(Tuples.numlines(this, lineCounts));
trapFile.numlines(this, lineCounts);
Context.TrapWriter.Archive(fi.FullName, text.Encoding);
}
}
Context.Emit(Tuples.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0));
trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0);
}
}
public override IId Id
public override void WriteId(System.IO.TextWriter trapFile)
{
get
if (Path is null)
trapFile.Write("GENERATED;sourcefile");
else
{
return Path == null ?
new Key("GENERATED;sourcefile") :
new Key(DatabasePath, ";sourcefile");
trapFile.Write(DatabasePath);
trapFile.Write(";sourcefile");
}
}
@@ -99,12 +100,15 @@ namespace Semmle.Extraction.Entities
public override bool NeedsPopulation => true;
public override void Populate()
public override void Populate(TextWriter trapFile)
{
Context.Emit(Tuples.files(this, "", "", ""));
trapFile.files(this, "", "", "");
}
public override IId Id => new Key("GENERATED;sourcefile");
public override void WriteId(TextWriter trapFile)
{
trapFile.Write("GENERATED;sourcefile");
}
public static GeneratedFile Create(Context cx) =>
GeneratedFileFactory.Instance.CreateEntity(cx, null);

View File

@@ -18,7 +18,7 @@ namespace Semmle.Extraction.Entities
public string DatabasePath => File.PathAsDatabaseId(Path);
public override void Populate()
public override void Populate(TextWriter trapFile)
{
// Ensure that the name of the root directory is consistent
// with the XmlTrapWriter.
@@ -27,16 +27,20 @@ namespace Semmle.Extraction.Entities
// On Windows: System.IO.DirectoryInfo.Name returns "L:\"
string shortName = symbol.Parent == null ? "" : symbol.Name;
Context.Emit(Tuples.folders(this, File.PathAsDatabaseString(Path), shortName));
trapFile.folders(this, File.PathAsDatabaseString(Path), shortName);
if (symbol.Parent != null)
{
Context.Emit(Tuples.containerparent(Create(Context, symbol.Parent), this));
trapFile.containerparent(Create(Context, symbol.Parent), this);
}
}
public override bool NeedsPopulation => true;
public override IId Id => new Key(DatabasePath, ";folder");
public override void WriteId(System.IO.TextWriter trapFile)
{
trapFile.Write(DatabasePath);
trapFile.Write(";folder");
}
public static Folder Create(Context cx, DirectoryInfo folder) =>
FolderFactory.Instance.CreateEntity2(cx, folder);

View File

@@ -1,3 +1,5 @@
using System.IO;
namespace Semmle.Extraction.Entities
{
public class GeneratedLocation : SourceLocation
@@ -10,12 +12,17 @@ namespace Semmle.Extraction.Entities
GeneratedFile = File.CreateGenerated(cx);
}
public override void Populate()
public override void Populate(TextWriter trapFile)
{
Context.Emit(Tuples.locations_default(this, GeneratedFile, 0, 0, 0, 0));
trapFile.locations_default(this, GeneratedFile, 0, 0, 0, 0);
}
public override IId Id => new Key("loc,", GeneratedFile, ",0,0,0,0");
public override void WriteId(TextWriter trapFile)
{
trapFile.Write("loc,");
trapFile.WriteSubId(GeneratedFile);
trapFile.Write(",0,0,0,0");
}
public override int GetHashCode() => 98732567;

View File

@@ -1,3 +1,4 @@
using System.IO;
using Microsoft.CodeAnalysis;
namespace Semmle.Extraction.Entities
@@ -9,12 +10,12 @@ namespace Semmle.Extraction.Entities
public new static Location Create(Context cx, Microsoft.CodeAnalysis.Location loc) => SourceLocationFactory.Instance.CreateEntity(cx, loc);
public override void Populate()
public override void Populate(TextWriter trapFile)
{
Position = symbol.GetLineSpan();
FileEntity = File.Create(Context, Position.Path);
Context.Emit(Tuples.locations_default(this, FileEntity, Position.Span.Start.Line + 1, Position.Span.Start.Character + 1,
Position.Span.End.Line + 1, Position.Span.End.Character));
trapFile.locations_default(this, FileEntity, Position.Span.Start.Line + 1, Position.Span.Start.Character + 1,
Position.Span.End.Line + 1, Position.Span.End.Character);
}
public override bool NeedsPopulation => true;
@@ -31,15 +32,20 @@ namespace Semmle.Extraction.Entities
private set;
}
public override IId Id
public override void WriteId(System.IO.TextWriter trapFile)
{
get
{
FileLinePositionSpan l = symbol.GetLineSpan();
FileEntity = Entities.File.Create(Context, l.Path);
return new Key("loc,", FileEntity, ",", l.Span.Start.Line + 1, ",",
l.Span.Start.Character + 1, ",", l.Span.End.Line + 1, ",", l.Span.End.Character);
}
FileLinePositionSpan l = symbol.GetLineSpan();
FileEntity = Entities.File.Create(Context, l.Path);
trapFile.Write("loc,");
trapFile.WriteSubId(FileEntity);
trapFile.Write(',');
trapFile.Write(l.Span.Start.Line + 1);
trapFile.Write(',');
trapFile.Write(l.Span.Start.Character + 1);
trapFile.Write(',');
trapFile.Write(l.Span.End.Line + 1);
trapFile.Write(',');
trapFile.Write(l.Span.End.Character);
}
class SourceLocationFactory : ICachedEntityFactory<Microsoft.CodeAnalysis.Location, SourceLocation>

View File

@@ -1,5 +1,5 @@
using Microsoft.CodeAnalysis;
using System;
using System.IO;
namespace Semmle.Extraction
{
@@ -21,20 +21,31 @@ namespace Semmle.Extraction
{
/// <summary>
/// The label of the entity, as it is in the trap file.
/// For example, "#123".
/// </summary>
Label Label { set; get; }
/// <summary>
/// The ID used for the entity, as it is in the trap file.
/// Could be '*'.
/// Writes the unique identifier of this entitiy to a trap file.
/// </summary>
IId Id { get; }
/// <param name="trapFile">The trapfile to write to.</param>
void WriteId(TextWriter writrapFileter);
/// <summary>
/// Writes the quoted identifier of this entity,
/// which could be @"..." or *
/// </summary>
/// <param name="trapFile">The trapfile to write to.</param>
void WriteQuotedId(TextWriter trapFile);
/// <summary>
/// The location for reporting purposes.
/// </summary>
Location ReportingLocation { get; }
/// <summary>
/// How the entity handles .push and .pop.
/// </summary>
TrapStackBehaviour TrapStackBehaviour { get; }
}
@@ -76,7 +87,7 @@ namespace Semmle.Extraction
/// as required. Is only called when <see cref="NeedsPopulation"/> returns
/// <code>true</code> and the entity has not already been populated.
/// </summary>
void Populate();
void Populate(TextWriter trapFile);
bool NeedsPopulation { get; }
@@ -129,5 +140,36 @@ namespace Semmle.Extraction
/// <returns>The entity.</returns>
public static Entity CreateEntity2<Type, Entity>(this ICachedEntityFactory<Type, Entity> factory, Context cx, Type init)
where Entity : ICachedEntity => cx.CreateEntity2(factory, init);
public static void DefineLabel(this IEntity entity, TextWriter trapFile)
{
trapFile.WriteLabel(entity);
trapFile.Write("=");
entity.WriteQuotedId(trapFile);
trapFile.WriteLine();
}
public static void DefineFreshLabel(this IEntity entity, TextWriter trapFile)
{
trapFile.WriteLabel(entity);
trapFile.WriteLine("=*");
}
/// <summary>
/// Generates a debug string for this entity.
/// </summary>
/// <param name="entity">The entity to view.</param>
/// <returns>The debug string.</returns>
public static string GetDebugLabel(this IEntity entity)
{
using (var writer = new StringWriter())
{
writer.WriteLabel(entity.Label.Value);
writer.Write('=');
entity.WriteQuotedId(writer);
return writer.ToString();
}
}
}
}

View File

@@ -1,4 +1,6 @@
using Semmle.Extraction.Entities;
using System;
using System.IO;
namespace Semmle.Extraction
{
@@ -20,9 +22,39 @@ namespace Semmle.Extraction
get; set;
}
public override string ToString() => Label.ToString();
public void WriteId(TextWriter writer)
{
writer.Write('*');
}
public IId Id => FreshId.Instance;
public void WriteQuotedId(TextWriter writer)
{
WriteId(writer);
}
protected abstract void Populate(TextWriter trapFile);
protected void TryPopulate()
{
cx.Try(null, null, () => Populate(cx.TrapWriter.Writer));
}
/// <summary>
/// For debugging.
/// </summary>
public string DebugContents
{
get
{
using (var writer = new StringWriter())
{
Populate(writer);
return writer.ToString();
}
}
}
public override string ToString() => Label.ToString();
public virtual Microsoft.CodeAnalysis.Location ReportingLocation => null;

View File

@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace Semmle.Extraction
@@ -16,7 +17,7 @@ namespace Semmle.Extraction
/// <summary>
/// Appends this ID to the supplied trap builder.
/// </summary>
void AppendTo(ITrapBuilder tb);
void AppendTo(TextWriter trapFile);
}
/// <summary>
@@ -37,9 +38,9 @@ namespace Semmle.Extraction
public override int GetHashCode() => 0;
public void AppendTo(ITrapBuilder tb)
public void AppendTo(TextWriter trapFile)
{
tb.Append("*");
trapFile.Write('*');
}
}
@@ -49,35 +50,41 @@ namespace Semmle.Extraction
/// </summary>
public class Key : IId
{
readonly IdTrapBuilder TrapBuilder;
readonly StringWriter TrapBuilder = new StringWriter();
/// <summary>
/// Creates a new key by concatenating the contents of the supplied
/// arguments.
/// Creates a new key by concatenating the contents of the supplied arguments.
/// </summary>
public Key(params object[] args)
{
TrapBuilder = new IdTrapBuilder();
TrapBuilder = new StringWriter();
foreach (var arg in args)
TrapBuilder.Append(arg);
{
if (arg is IEntity)
{
var key = ((IEntity)arg).Label;
TrapBuilder.Write("{#");
TrapBuilder.Write(key.Value.ToString());
TrapBuilder.Write("}");
}
else
TrapBuilder.Write(arg.ToString());
}
}
/// <summary>
/// Creates a new key by applying the supplied action to an empty
/// trap builder.
/// </summary>
public Key(Action<ITrapBuilder> action)
public Key(Action<TextWriter> action)
{
TrapBuilder = new IdTrapBuilder();
action(TrapBuilder);
}
public override string ToString()
{
// Only implemented for debugging purposes
var tsb = new TrapStringBuilder();
AppendTo(tsb);
return tsb.ToString();
return TrapBuilder.ToString();
}
public override bool Equals(object obj)
@@ -85,68 +92,23 @@ namespace Semmle.Extraction
if (obj.GetType() != GetType())
return false;
var id = (Key)obj;
return id.TrapBuilder.Fragments.SequenceEqual(TrapBuilder.Fragments);
return TrapBuilder.ToString() == id.TrapBuilder.ToString();
}
public override int GetHashCode()
public override int GetHashCode() => TrapBuilder.ToString().GetHashCode();
public void AppendTo(TextWriter trapFile)
{
unchecked
{
int hash = 17;
foreach (var fragment in TrapBuilder.Fragments)
{
hash = hash * 23 + fragment.GetHashCode();
}
return hash;
}
}
public void AppendTo(ITrapBuilder tb)
{
tb.Append("@\"");
foreach (var fragment in TrapBuilder.Fragments)
tb.Append(fragment);
tb.Append("\"");
}
class IdTrapBuilder : ITrapBuilder
{
readonly public List<string> Fragments = new List<string>();
public ITrapBuilder Append(object arg)
{
if (arg is IEntity)
{
var key = ((IEntity)arg).Label;
Fragments.Add("{#");
Fragments.Add(key.Value.ToString());
Fragments.Add("}");
}
else
Fragments.Add(arg.ToString());
return this;
}
public ITrapBuilder Append(string arg)
{
Fragments.Add(arg);
return this;
}
public ITrapBuilder AppendLine()
{
throw new NotImplementedException();
}
trapFile.Write("@\"");
trapFile.Write(TrapBuilder.ToString());
trapFile.Write("\"");
}
}
/// <summary>
/// A label referencing an entity, of the form "#123".
/// </summary>
public struct Label : IId
public struct Label
{
public Label(int value) : this()
{
@@ -181,13 +143,14 @@ namespace Semmle.Extraction
/// <summary>
/// Constructs a unique string for this label.
/// </summary>
/// <param name="tb">The trap builder used to store the result.</param>
public void AppendTo(ITrapBuilder tb)
/// <param name="trapFile">The trap builder used to store the result.</param>
public void AppendTo(System.IO.TextWriter trapFile)
{
if (!Valid)
throw new NullReferenceException("Attempt to use an invalid label");
tb.Append("#").Append(Value);
trapFile.Write('#');
trapFile.Write(Value);
}
}
}

View File

@@ -31,8 +31,8 @@ namespace Semmle.Extraction
}
public Location Location { get; }
public string Text;
public string EntityText;
public string Text { get; }
public string EntityText { get; }
public override string Message => Text;
}

View File

@@ -54,14 +54,15 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>The full filepath of the trap file.</returns>
public string GetTrapPath(ILogger logger, string srcFile) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile);
public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
/// <summary>
/// Creates a trap writer for a given source/assembly file.
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>A newly created TrapWriter.</returns>
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates);
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression);
}
readonly SubProject DefaultProject;

View File

@@ -0,0 +1,40 @@
using Microsoft.CodeAnalysis;
namespace Semmle.Extraction
{
public static class LocationExtensions
{
public static int StartLine(this Location loc) => loc.GetLineSpan().Span.Start.Line;
public static int StartColumn(this Location loc) => loc.GetLineSpan().Span.Start.Character;
public static int EndLine(this Location loc) => loc.GetLineSpan().Span.End.Line;
/// <summary>
/// Whether one Location outer completely contains another Location inner.
/// </summary>
/// <param name="outer">The outer location.</param>
/// <param name="inner">The inner location</param>
/// <returns>Whether inner is completely container in outer.</returns>
public static bool Contains(this Location outer, Location inner)
{
bool sameFile = outer.SourceTree == inner.SourceTree;
bool startsBefore = outer.SourceSpan.Start <= inner.SourceSpan.Start;
bool endsAfter = outer.SourceSpan.End >= inner.SourceSpan.End;
return sameFile && startsBefore && endsAfter;
}
/// <summary>
/// Whether one Location ends before another starts.
/// </summary>
/// <param name="before">The Location coming before</param>
/// <param name="after">The Location coming after</param>
/// <returns>Whether 'before' comes before 'after'.</returns>
public static bool Before(this Location before, Location after)
{
bool sameFile = before.SourceTree == after.SourceTree;
bool endsBefore = before.SourceSpan.End <= after.SourceSpan.Start;
return sameFile && endsBefore;
}
}
}

View File

@@ -44,6 +44,11 @@ namespace Semmle.Extraction
/// </summary>
public bool Fast = false;
/// <summary>
/// The compression algorithm used for trap files.
/// </summary>
public TrapWriter.CompressionMode TrapCompression = TrapWriter.CompressionMode.Gzip;
public virtual bool handleOption(string key, string value)
{
switch (key)
@@ -85,6 +90,9 @@ namespace Semmle.Extraction
CIL = !value;
Fast = value;
return true;
case "brotli":
TrapCompression = value ? TrapWriter.CompressionMode.Brotli : TrapWriter.CompressionMode.Gzip;
return true;
default:
return false;
}

View File

@@ -1,3 +1,5 @@
using System.IO;
namespace Semmle.Extraction
{
/// <summary>
@@ -18,7 +20,22 @@ namespace Semmle.Extraction
public override string ToString() => Label.ToString();
public abstract void Populate();
public abstract void Populate(TextWriter trapFile);
/// <summary>
/// For debugging.
/// </summary>
public string DebugContents
{
get
{
using (var trap = new StringWriter())
{
Populate(trap);
return trap.ToString();
}
}
}
public Context Context
{
@@ -34,9 +51,13 @@ namespace Semmle.Extraction
public Initializer UnderlyingObject => symbol;
public abstract IId Id
public abstract void WriteId(System.IO.TextWriter trapFile);
public void WriteQuotedId(TextWriter trapFile)
{
get;
trapFile.Write("@\"");
WriteId(trapFile);
trapFile.Write('\"');
}
public abstract bool NeedsPopulation

View File

@@ -1,96 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace Semmle.Extraction
{
/// <summary>
/// A trap builder.
///
/// A trap builder is used to construct a string that is to be
/// persisted in a trap file (similar to how a <see cref="StringBuilder"/>
/// can be used to construct a string).
/// </summary>
public interface ITrapBuilder
{
/// <summary>
/// Append the given object to this trap builder.
/// </summary>
ITrapBuilder Append(object arg);
/// <summary>
/// Append the given string to this trap builder.
/// </summary>
ITrapBuilder Append(string arg);
/// <summary>
/// Append a newline to this trap builder.
/// </summary>
ITrapBuilder AppendLine();
}
public static class ITrapBuilderExtensions
{
/// <summary>
/// Appends a [comma] separated list to a trap builder.
/// </summary>
/// <typeparam name="T">The type of the list.</typeparam>
/// <param name="tb">The trap builder to append to.</param>
/// <param name="separator">The separator string (e.g. ",")</param>
/// <param name="items">The list of items.</param>
/// <returns>The original trap builder (fluent interface).</returns>
public static ITrapBuilder AppendList<T>(this ITrapBuilder tb, string separator, IEnumerable<T> items)
{
return tb.BuildList(separator, items, (x, tb0) => { tb0.Append(x); });
}
/// <summary>
/// Builds a trap builder using a separator and an action for each item in the list.
/// </summary>
/// <typeparam name="T">The type of the items.</typeparam>
/// <param name="tb">The trap builder to append to.</param>
/// <param name="separator">The separator string (e.g. ",")</param>
/// <param name="items">The list of items.</param>
/// <param name="action">The action on each item.</param>
/// <returns>The original trap builder (fluent interface).</returns>
public static ITrapBuilder BuildList<T>(this ITrapBuilder tb, string separator, IEnumerable<T> items, Action<T, ITrapBuilder> action)
{
bool first = true;
foreach (var item in items)
{
if (first) first = false; else tb.Append(separator);
action(item, tb);
}
return tb;
}
}
/// <summary>
/// A <see cref="StringBuilder"/> implementation of <see cref="ITrapBuilder"/>,
/// used for debugging only.
/// </summary>
public class TrapStringBuilder : ITrapBuilder
{
readonly StringBuilder StringBuilder = new StringBuilder();
public ITrapBuilder Append(object arg)
{
StringBuilder.Append(arg);
return this;
}
public ITrapBuilder Append(string arg)
{
StringBuilder.Append(arg);
return this;
}
public ITrapBuilder AppendLine()
{
StringBuilder.AppendLine();
return this;
}
public override string ToString() => StringBuilder.ToString();
}
}

View File

@@ -0,0 +1,252 @@
using System;
using System.Collections.Generic;
using System.IO;
namespace Semmle.Extraction
{
public static class TrapExtensions
{
public static void WriteLabel(this TextWriter trapFile, int value)
{
trapFile.Write('#');
trapFile.Write(value);
}
public static void WriteLabel(this TextWriter trapFile, IEntity entity)
{
trapFile.WriteLabel(entity.Label.Value);
}
public static void WriteSubId(this TextWriter trapFile, IEntity entity)
{
trapFile.Write('{');
trapFile.WriteLabel(entity);
trapFile.Write('}');
}
public static void WriteSeparator(this TextWriter trapFile, string separator, ref int index)
{
if (index++ > 0) trapFile.Write(separator);
}
public static TextWriter WriteColumn(this TextWriter trapFile, int i)
{
trapFile.Write(i);
return trapFile;
}
public static TextWriter WriteColumn(this TextWriter trapFile, string s)
{
trapFile.WriteTrapString(s);
return trapFile;
}
public static TextWriter WriteColumn(this TextWriter trapFile, IEntity entity)
{
trapFile.WriteLabel(entity.Label.Value);
return trapFile;
}
public static TextWriter WriteColumn(this TextWriter trapFile, Label label)
{
trapFile.WriteLabel(label.Value);
return trapFile;
}
public static TextWriter WriteColumn(this TextWriter trapFile, float f)
{
trapFile.WriteTrapFloat(f);
return trapFile;
}
public static TextWriter WriteColumn(this TextWriter trapFile, object o)
{
switch (o)
{
case int i:
return trapFile.WriteColumn(i);
case float f:
return trapFile.WriteColumn(f);
case string s:
return trapFile.WriteColumn(s);
case IEntity e:
return trapFile.WriteColumn(e);
case Label l:
return trapFile.WriteColumn(l);
case Enum _:
return trapFile.WriteColumn((int)o);
default:
throw new ArgumentException(nameof(o));
}
}
const int maxStringBytes = 1 << 20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static void WriteString(TextWriter trapFile, string s) => trapFile.Write(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="trapFile">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(TextWriter trapFile, string s, ref int bytesRemaining)
{
WriteString(trapFile, TruncateString(s, ref bytesRemaining));
}
public static void WriteTrapString(this TextWriter trapFile, string s)
{
trapFile.Write('\"');
if (NeedsTruncation(s))
{
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(trapFile, s, ref remaining);
}
else
{
// Fast path
WriteString(trapFile, s);
}
trapFile.Write('\"');
}
public static void WriteTrapFloat(this TextWriter trapFile, float f)
{
trapFile.Write(f.ToString("0.#####e0")); // Trap importer won't accept ints
}
public static void WriteTuple(this TextWriter trapFile, string name, params object[] @params)
{
trapFile.Write(name);
trapFile.Write('(');
int index = 0;
foreach (var p in @params)
{
trapFile.WriteSeparator(",", ref index);
trapFile.WriteColumn(p);
}
trapFile.WriteLine(')');
}
public static void WriteTuple(this TextWriter trapFile, string name, IEntity p1)
{
trapFile.Write(name);
trapFile.Write('(');
trapFile.WriteColumn(p1);
trapFile.WriteLine(')');
}
public static void WriteTuple(this TextWriter trapFile, string name, IEntity p1, object p2)
{
trapFile.Write(name);
trapFile.Write('(');
trapFile.WriteColumn(p1);
trapFile.Write(',');
trapFile.WriteColumn(p2);
trapFile.WriteLine(')');
}
public static void WriteTuple(this TextWriter trapFile, string name, IEntity p1, object p2, object p3)
{
trapFile.Write(name);
trapFile.Write('(');
trapFile.WriteColumn(p1);
trapFile.Write(',');
trapFile.WriteColumn(p2);
trapFile.Write(',');
trapFile.WriteColumn(p3);
trapFile.WriteLine(')');
}
public static void WriteTuple(this TextWriter trapFile, string name, IEntity p1, object p2, object p3, object p4)
{
trapFile.Write(name);
trapFile.Write('(');
trapFile.WriteColumn(p1);
trapFile.Write(',');
trapFile.WriteColumn(p2);
trapFile.Write(',');
trapFile.WriteColumn(p3);
trapFile.Write(',');
trapFile.WriteColumn(p4);
trapFile.WriteLine(')');
}
/// <summary>
/// Appends a [comma] separated list to a trap builder.
/// </summary>
/// <typeparam name="T">The type of the list.</typeparam>
/// <param name="trapFile">The trap builder to append to.</param>
/// <param name="separator">The separator string (e.g. ",")</param>
/// <param name="items">The list of items.</param>
/// <returns>The original trap builder (fluent interface).</returns>
public static TextWriter AppendList<T>(this TextWriter trapFile, string separator, IEnumerable<T> items) where T : IEntity
{
return trapFile.BuildList(separator, items, (x, tb0) => { tb0.WriteSubId(x); });
}
/// <summary>
/// Builds a trap builder using a separator and an action for each item in the list.
/// </summary>
/// <typeparam name="T">The type of the items.</typeparam>
/// <param name="trapFile">The trap builder to append to.</param>
/// <param name="separator">The separator string (e.g. ",")</param>
/// <param name="items">The list of items.</param>
/// <param name="action">The action on each item.</param>
/// <returns>The original trap builder (fluent interface).</returns>
public static TextWriter BuildList<T>(this TextWriter trapFile, string separator, IEnumerable<T> items, Action<T, TextWriter> action)
{
bool first = true;
foreach (var item in items)
{
if (first) first = false; else trapFile.Write(separator);
action(item, trapFile);
}
return trapFile;
}
}
}

View File

@@ -1,33 +1,32 @@
using Semmle.Util;
using Semmle.Util.Logging;
using System;
using System.IO;
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text;
using Semmle.Util;
using Semmle.Util.Logging;
namespace Semmle.Extraction
{
public interface ITrapEmitter
{
void EmitToTrapBuilder(ITrapBuilder tb);
void EmitTrap(TextWriter trapFile);
}
public sealed class TrapWriter : IDisposable
{
//#################### ENUMERATIONS ####################
#region
public enum InnerPathComputation
{
ABSOLUTE,
RELATIVE
}
#endregion
//#################### PRIVATE VARIABLES ####################
#region
public enum CompressionMode
{
None,
Gzip,
Brotli
}
/// <summary>
/// The location of the src_archive directory.
@@ -37,29 +36,22 @@ namespace Semmle.Extraction
private readonly bool discardDuplicates;
#endregion
//#################### PROPERTIES ####################
#region
public int IdCounter { get; set; } = 1;
readonly Lazy<StreamWriter> WriterLazy;
readonly Lazy<TrapBuilder> BuilderLazy;
TrapBuilder Builder => BuilderLazy.Value;
public StreamWriter Writer => WriterLazy.Value;
readonly ILogger Logger;
#endregion
readonly CompressionMode TrapCompression;
//#################### CONSTRUCTORS ####################
#region
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates)
public TrapWriter(ILogger logger, string outputfile, string trap, string archive, bool discardDuplicates, CompressionMode trapCompression)
{
Logger = logger;
TrapFile = TrapPath(Logger, trap, outputfile);
TrapCompression = trapCompression;
TrapFile = TrapPath(Logger, trap, outputfile, trapCompression);
WriterLazy = new Lazy<StreamWriter>(() =>
{
@@ -81,10 +73,27 @@ namespace Semmle.Extraction
while (File.Exists(tmpFile));
var fileStream = new FileStream(tmpFile, FileMode.CreateNew, FileAccess.Write);
var compressionStream = new GZipStream(fileStream, CompressionMode.Compress);
Stream compressionStream;
switch (trapCompression)
{
case CompressionMode.Brotli:
compressionStream = new BrotliStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.Gzip:
compressionStream = new GZipStream(fileStream, CompressionLevel.Fastest);
break;
case CompressionMode.None:
compressionStream = fileStream;
break;
default:
throw new ArgumentException(nameof(trapCompression));
}
return new StreamWriter(compressionStream, UTF8, 2000000);
});
BuilderLazy = new Lazy<TrapBuilder>(() => new TrapBuilder(WriterLazy.Value));
this.archive = archive;
this.discardDuplicates = discardDuplicates;
}
@@ -95,11 +104,6 @@ namespace Semmle.Extraction
public readonly string TrapFile;
string tmpFile; // The temporary file which is moved to trapFile once written.
#endregion
//#################### PUBLIC METHODS ####################
#region
/// <summary>
/// Adds the specified input file to the source archive. It may end up in either the normal or long path area
/// of the source archive, depending on the length of its full path.
@@ -183,7 +187,7 @@ namespace Semmle.Extraction
if (existingHash != hash)
{
var root = TrapFile.Substring(0, TrapFile.Length - 8); // Remove trailing ".trap.gz"
if (TryMove(tmpFile, $"{root}-{hash}.trap.gz"))
if (TryMove(tmpFile, $"{root}-{hash}.trap{TrapExtension(TrapCompression)}"))
return;
}
Logger.Log(Severity.Info, "Identical trap file for {0} already exists", TrapFile);
@@ -198,14 +202,9 @@ namespace Semmle.Extraction
public void Emit(ITrapEmitter emitter)
{
emitter.EmitToTrapBuilder(Builder);
emitter.EmitTrap(Writer);
}
#endregion
//#################### PRIVATE METHODS ####################
#region
/// <summary>
/// Computes the hash of <paramref name="filePath"/>.
/// </summary>
@@ -222,34 +221,6 @@ namespace Semmle.Extraction
}
}
class TrapBuilder : ITrapBuilder
{
readonly StreamWriter StreamWriter;
public TrapBuilder(StreamWriter sw)
{
StreamWriter = sw;
}
public ITrapBuilder Append(object arg)
{
StreamWriter.Write(arg);
return this;
}
public ITrapBuilder Append(string arg)
{
StreamWriter.Write(arg);
return this;
}
public ITrapBuilder AppendLine()
{
StreamWriter.WriteLine();
return this;
}
}
/// <summary>
/// Attempts to archive the specified input file to the normal area of the source archive.
/// The file's path must be sufficiently short so as to render the path of its copy in the
@@ -311,15 +282,24 @@ namespace Semmle.Extraction
return nested;
}
public static string TrapPath(ILogger logger, string folder, string filename)
static string TrapExtension(CompressionMode compression)
{
filename = Path.GetFullPath(filename) + ".trap.gz";
switch (compression)
{
case CompressionMode.None: return "";
case CompressionMode.Gzip: return ".gz";
case CompressionMode.Brotli: return ".br";
default: throw new ArgumentException(nameof(compression));
}
}
public static string TrapPath(ILogger logger, string folder, string filename, TrapWriter.CompressionMode trapCompression)
{
filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}";
if (string.IsNullOrEmpty(folder))
folder = Directory.GetCurrentDirectory();
return NestPaths(logger, folder, filename, InnerPathComputation.ABSOLUTE); ;
}
#endregion
}
}

View File

@@ -1,10 +1,9 @@
using System.Linq;
using System.IO;
namespace Semmle.Extraction
{
/// <summary>
/// A tuple represents a string of the form "a(b,c,d)"
/// Its purpose is mainly to make various method calls typesafe.
/// A tuple represents a string of the form "a(b,c,d)".
/// </summary>
public struct Tuple : ITrapEmitter
{
@@ -17,147 +16,23 @@ namespace Semmle.Extraction
Args = args;
}
const int maxStringBytes = 1<<20; // 1MB
static readonly System.Text.Encoding encoding = System.Text.Encoding.UTF8;
private static bool NeedsTruncation(string s)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the string exceeding maxStringBytes
return encoding.GetMaxByteCount(s.Length) > maxStringBytes &&
encoding.GetByteCount(s) > maxStringBytes;
}
private static bool NeedsTruncation(string[] array)
{
// Optimization: only count the actual number of bytes if there is the possibility
// of the strings exceeding maxStringBytes
return encoding.GetMaxByteCount(array.Sum(s => s.Length)) > maxStringBytes &&
array.Sum(encoding.GetByteCount) > maxStringBytes;
}
private static void WriteString(ITrapBuilder tb, string s) => tb.Append(EncodeString(s));
/// <summary>
/// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="s">The input string to truncate.</param>
/// <param name="bytesRemaining">The number of bytes available.</param>
/// <returns>The truncated string.</returns>
private static string TruncateString(string s, ref int bytesRemaining)
{
int outputLen = encoding.GetByteCount(s);
if (outputLen > bytesRemaining)
{
outputLen = 0;
int chars;
for (chars = 0; chars < s.Length; ++chars)
{
var bytes = encoding.GetByteCount(s, chars, 1);
if (outputLen + bytes <= bytesRemaining)
outputLen += bytes;
else
break;
}
s = s.Substring(0, chars);
}
bytesRemaining -= outputLen;
return s;
}
private static string EncodeString(string s) => s.Replace("\"", "\"\"");
/// <summary>
/// Output a string to the trap file, such that the encoded output does not exceed
/// <paramref name="bytesRemaining"/> bytes.
/// </summary>
/// <param name="tb">The trapbuilder</param>
/// <param name="s">The string to output.</param>
/// <param name="bytesRemaining">The remaining bytes available to output.</param>
private static void WriteTruncatedString(ITrapBuilder tb, string s, ref int bytesRemaining)
{
WriteString(tb, TruncateString(s, ref bytesRemaining));
}
/// <summary>
/// Constructs a unique string for this tuple.
/// </summary>
/// <param name="tb">The trap builder used to store the result.</param>
public void EmitToTrapBuilder(ITrapBuilder tb)
/// <param name="trapFile">The trap file to write to.</param>
public void EmitTrap(TextWriter trapFile)
{
tb.Append(Name).Append("(");
int column = 0;
foreach (var a in Args)
{
if (column > 0) tb.Append(", ");
switch(a)
{
case Label l:
l.AppendTo(tb);
break;
case IEntity e:
e.Label.AppendTo(tb);
break;
case string s:
tb.Append("\"");
if (NeedsTruncation(s))
{
// Slow path
int remaining = maxStringBytes;
WriteTruncatedString(tb, s, ref remaining);
}
else
{
// Fast path
WriteString(tb, s);
}
tb.Append("\"");
break;
case System.Enum _:
tb.Append((int)a);
break;
case int i:
tb.Append(i);
break;
case float f:
tb.Append(f.ToString("0.#####e0")); // Trap importer won't accept ints
break;
case string[] array:
tb.Append("\"");
if (NeedsTruncation(array))
{
// Slow path
int remaining = maxStringBytes;
foreach (var element in array)
WriteTruncatedString(tb, element, ref remaining);
}
else
{
// Fast path
foreach (var element in array)
WriteString(tb, element);
}
tb.Append("\"");
break;
case null:
throw new InternalError($"Attempt to write a null argument tuple {Name} at column {column}");
default:
throw new InternalError($"Attempt to write an invalid argument type {a.GetType()} in tuple {Name} at column {column}");
}
++column;
}
tb.Append(")");
tb.AppendLine();
trapFile.WriteTuple(Name, Args);
}
public override string ToString()
{
// Only implemented for debugging purposes
var tsb = new TrapStringBuilder();
EmitToTrapBuilder(tsb);
return tsb.ToString();
using (var writer = new StringWriter())
{
EmitTrap(writer);
return writer.ToString();
}
}
}
}

View File

@@ -8,28 +8,44 @@ namespace Semmle.Extraction
/// </summary>
static class Tuples
{
internal static Tuple assemblies(Assembly assembly, File file, string identifier, string name, string version) =>
new Tuple("assemblies", assembly, file, identifier, name, version);
public static void assemblies(this System.IO.TextWriter trapFile, Assembly assembly, File file, string identifier, string name, string version)
{
trapFile.WriteTuple("assemblies", assembly, file, identifier, name, version);
}
internal static Tuple containerparent(Folder parent, IEntity child) =>
new Tuple("containerparent", parent, child);
public static void containerparent(this System.IO.TextWriter trapFile, Folder parent, IEntity child)
{
trapFile.WriteTuple("containerparent", parent, child);
}
internal static Tuple extractor_messages(ExtractionMessage error, Semmle.Util.Logging.Severity severity, string origin, string errorMessage, string entityText, Location location, string stackTrace) =>
new Tuple("extractor_messages", error, severity, origin, errorMessage, entityText, location, stackTrace);
public static void extractor_messages(this System.IO.TextWriter trapFile, ExtractionMessage error, Semmle.Util.Logging.Severity severity, string origin, string errorMessage, string entityText, Location location, string stackTrace)
{
trapFile.WriteTuple("extractor_messages", error, (int)severity, origin, errorMessage, entityText, location, stackTrace);
}
internal static Tuple file_extraction_mode(File file, int mode) =>
new Tuple("file_extraction_mode", file, mode);
internal static void file_extraction_mode(this System.IO.TextWriter trapFile, Entities.File file, int mode)
{
trapFile.WriteTuple("file_extraction_mode", file, mode);
}
internal static Tuple files(File file, string fullName, string name, string extension) =>
new Tuple("files", file, fullName, name, extension, 0);
public static void files(this System.IO.TextWriter trapFile, File file, string fullName, string name, string extension)
{
trapFile.WriteTuple("files", file, fullName, name, extension, 0);
}
internal static Tuple folders(Folder folder, string path, string name) =>
new Tuple("folders", folder, path, name);
public static void folders(this System.IO.TextWriter trapFile, Folder folder, string path, string name)
{
trapFile.WriteTuple("folders", folder, path, name);
}
internal static Tuple locations_default(SourceLocation label, File file, int startLine, int startCol, int endLine, int endCol) =>
new Tuple("locations_default", label, file, startLine, startCol, endLine, endCol);
public static void locations_default(this System.IO.TextWriter trapFile, SourceLocation label, Entities.File file, int startLine, int startCol, int endLine, int endCol)
{
trapFile.WriteTuple("locations_default", label, file, startLine, startCol, endLine, endCol);
}
internal static Tuple numlines(IEntity label, LineCounts lineCounts) =>
new Tuple("numlines", label, lineCounts.Total, lineCounts.Code, lineCounts.Comment);
public static void numlines(this System.IO.TextWriter trapFile, IEntity label, LineCounts lineCounts)
{
trapFile.WriteTuple("numlines", label, lineCounts.Total, lineCounts.Code, lineCounts.Comment);
}
}
}