C#: Implement support for path transformers

This commit is contained in:
Tom Hvitved
2020-09-07 08:57:49 +02:00
parent 14567f5314
commit 37f1ce3122
18 changed files with 442 additions and 231 deletions

View File

@@ -10,93 +10,54 @@ namespace Semmle.Extraction.Entities
File(Context cx, string path)
: base(cx, path)
{
Path = path;
OriginalPath = path;
TransformedPath = Context.Extractor.PathTransformer.Transform(OriginalPath);
}
public string Path
{
get;
private set;
}
readonly string OriginalPath;
readonly PathTransformer.ITransformedPath TransformedPath;
public string DatabasePath => PathAsDatabaseId(Path);
public override bool NeedsPopulation => Context.DefinesFile(Path) || Path == Context.Extractor.OutputPath;
public override bool NeedsPopulation => Context.DefinesFile(OriginalPath) || OriginalPath == Context.Extractor.OutputPath;
public override void Populate(TextWriter trapFile)
{
if (Path == null)
trapFile.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension);
if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir)
trapFile.containerparent(Folder.Create(Context, dir), this);
var fromSource = TransformedPath.Extension.ToLowerInvariant().Equals("cs");
if (fromSource)
{
trapFile.files(this, "", "", "");
}
else
{
var fi = new FileInfo(Path);
string extension = fi.Extension ?? "";
string name = fi.Name;
name = name.Substring(0, name.Length - extension.Length);
int fromSource = extension.ToLowerInvariant().Equals(".cs") ? 1 : 2;
// remove the dot from the extension
if (extension.Length > 0)
extension = extension.Substring(1);
trapFile.files(this, PathAsDatabaseString(Path), name, extension);
trapFile.containerparent(Folder.Create(Context, fi.Directory), this);
if (fromSource == 1)
foreach (var text in Context.Compilation.SyntaxTrees.
Where(t => t.FilePath == OriginalPath).
Select(tree => tree.GetText()))
{
foreach (var text in Context.Compilation.SyntaxTrees.
Where(t => t.FilePath == Path).
Select(tree => tree.GetText()))
{
var rawText = text.ToString() ?? "";
var lineCounts = LineCounter.ComputeLineCounts(rawText);
if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++;
var rawText = text.ToString() ?? "";
var lineCounts = LineCounter.ComputeLineCounts(rawText);
if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++;
trapFile.numlines(this, lineCounts);
Context.TrapWriter.Archive(fi.FullName, text.Encoding ?? System.Text.Encoding.Default);
}
trapFile.numlines(this, lineCounts);
Context.TrapWriter.Archive(OriginalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default);
}
trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0);
}
trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0);
}
public override void WriteId(System.IO.TextWriter trapFile)
{
if (Path is null)
trapFile.Write("GENERATED;sourcefile");
else
{
trapFile.Write(DatabasePath);
trapFile.Write(";sourcefile");
}
trapFile.Write(TransformedPath.DatabaseId);
trapFile.Write(";sourcefile");
}
/// <summary>
/// Converts a path string into a string to use as an ID
/// in the QL database.
/// </summary>
/// <param name="path">An absolute path.</param>
/// <returns>The database ID.</returns>
public static string PathAsDatabaseId(string path)
{
if (path.Length >= 2 && path[1] == ':' && Char.IsLower(path[0]))
path = Char.ToUpper(path[0]) + "_" + path.Substring(2);
return path.Replace('\\', '/').Replace(":", "_");
}
public static string PathAsDatabaseString(string path) => path.Replace('\\', '/');
public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, path);
public static File CreateGenerated(Context cx) => GeneratedFile.Create(cx);
class GeneratedFile : File
{
GeneratedFile(Context cx)
: base(cx, "") { }
GeneratedFile(Context cx) : base(cx, "") { }
public override bool NeedsPopulation => true;

View File

@@ -2,65 +2,44 @@ using System.IO;
namespace Semmle.Extraction.Entities
{
sealed class Folder : CachedEntity<DirectoryInfo>
sealed class Folder : CachedEntity<PathTransformer.ITransformedPath>
{
Folder(Context cx, DirectoryInfo init)
: base(cx, init)
{
Path = init.FullName;
}
public string Path
{
get;
private set;
}
public string DatabasePath => File.PathAsDatabaseId(Path);
Folder(Context cx, PathTransformer.ITransformedPath init) : base(cx, init) { }
public override void Populate(TextWriter trapFile)
{
// Ensure that the name of the root directory is consistent
// with the XmlTrapWriter.
// Linux/Windows: java.io.File.getName() returns ""
// On Linux: System.IO.DirectoryInfo.Name returns "/"
// On Windows: System.IO.DirectoryInfo.Name returns "L:\"
string shortName = symbol.Parent == null ? "" : symbol.Name;
trapFile.folders(this, File.PathAsDatabaseString(Path), shortName);
if (symbol.Parent != null)
{
trapFile.containerparent(Create(Context, symbol.Parent), this);
}
trapFile.folders(this, symbol.Value, symbol.NameWithoutExtension);
if (symbol.ParentDirectory is PathTransformer.ITransformedPath parent)
trapFile.containerparent(Create(Context, parent), this);
}
public override bool NeedsPopulation => true;
public override void WriteId(System.IO.TextWriter trapFile)
{
trapFile.Write(DatabasePath);
trapFile.Write(symbol.DatabaseId);
trapFile.Write(";folder");
}
public static Folder Create(Context cx, DirectoryInfo folder) =>
public static Folder Create(Context cx, PathTransformer.ITransformedPath folder) =>
FolderFactory.Instance.CreateEntity2(cx, folder);
public override Microsoft.CodeAnalysis.Location? ReportingLocation => null;
class FolderFactory : ICachedEntityFactory<DirectoryInfo, Folder>
class FolderFactory : ICachedEntityFactory<PathTransformer.ITransformedPath, Folder>
{
public static readonly FolderFactory Instance = new FolderFactory();
public Folder Create(Context cx, DirectoryInfo init) => new Folder(cx, init);
public Folder Create(Context cx, PathTransformer.ITransformedPath init) => new Folder(cx, init);
}
public override TrapStackBehaviour TrapStackBehaviour => TrapStackBehaviour.NoLabel;
public override int GetHashCode() => Path.GetHashCode();
public override int GetHashCode() => symbol.GetHashCode();
public override bool Equals(object? obj)
{
return obj is Folder folder && folder.Path == Path;
return obj is Folder folder && Equals(folder.symbol, symbol);
}
}
}

View File

@@ -81,6 +81,11 @@ namespace Semmle.Extraction
/// </summary>
ILogger Logger { get; }
/// <summary>
/// The path transformer to apply.
/// </summary>
PathTransformer PathTransformer { get; }
/// <summary>
/// Creates a new context.
/// </summary>
@@ -111,11 +116,14 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="standalone">If the extraction is standalone.</param>
/// <param name="outputPath">The name of the output DLL/EXE, or null if not specified (standalone extraction).</param>
public Extractor(bool standalone, string outputPath, ILogger logger)
/// <param name="logger">The object used for logging.</param>
/// <param name="pathTransformer">The object used for path transformations.</param>
public Extractor(bool standalone, string outputPath, ILogger logger, PathTransformer pathTransformer)
{
Standalone = standalone;
OutputPath = outputPath;
Logger = logger;
PathTransformer = pathTransformer;
}
// Limit the number of error messages in the log file
@@ -205,5 +213,7 @@ namespace Semmle.Extraction
public ILogger Logger { get; private set; }
public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
public PathTransformer PathTransformer { get; }
}
}

View File

@@ -54,14 +54,15 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>The full filepath of the trap file.</returns>
public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
public string GetTrapPath(ILogger logger, PathTransformer.ITransformedPath srcFile, TrapWriter.CompressionMode trapCompression) =>
TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
/// <summary>
/// Creates a trap writer for a given source/assembly file.
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>A newly created TrapWriter.</returns>
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
public TrapWriter CreateTrapWriter(ILogger logger, PathTransformer.ITransformedPath srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression);
}
@@ -73,7 +74,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="sourceFile">The file to look up.</param>
/// <returns>The relevant subproject, or null if not found.</returns>
public SubProject? LookupProjectOrNull(string sourceFile)
public SubProject? LookupProjectOrNull(PathTransformer.ITransformedPath sourceFile)
{
if (!useLayoutFile) return DefaultProject;
@@ -89,7 +90,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="sourceFile">The file to look up.</param>
/// <returns>The relevant subproject, or DefaultProject if not found.</returns>
public SubProject LookupProjectOrDefault(string sourceFile)
public SubProject LookupProjectOrDefault(PathTransformer.ITransformedPath sourceFile)
{
return LookupProjectOrNull(sourceFile) ?? DefaultProject;
}
@@ -134,7 +135,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="path">The absolute path of the file to query.</param>
/// <returns>True iff there is no layout file or the layout file specifies the file.</returns>
public bool FileInLayout(string path) => LookupProjectOrNull(path) != null;
public bool FileInLayout(PathTransformer.ITransformedPath path) => LookupProjectOrNull(path) != null;
void ReadLayoutFile(string layout)
{
@@ -197,6 +198,6 @@ namespace Semmle.Extraction
}
}
public bool Matches(string path) => FilePattern.Matches(filePatterns, path, out var _);
public bool Matches(PathTransformer.ITransformedPath path) => FilePattern.Matches(filePatterns, path.Value, out var _);
}
}

View File

@@ -0,0 +1,178 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Diagnostics.CodeAnalysis;
using Semmle.Util;
namespace Semmle.Extraction
{
/// <summary>
/// A class for interpreting path transformers specified using the environment
/// variable `CODEQL_PATH_TRANSFORMER`.
/// </summary>
public sealed class PathTransformer
{
public class InvalidPathTransformerException : Exception
{
public InvalidPathTransformerException(string message) :
base($"Invalid path transformer specification: {message}")
{ }
}
/// <summary>
/// A transformed path.
/// </summary>
public interface ITransformedPath
{
string Value { get; }
string Extension { get; }
string NameWithoutExtension { get; }
ITransformedPath? ParentDirectory { get; }
ITransformedPath WithSuffix(string suffix);
string DatabaseId { get; }
}
struct TransformedPath : ITransformedPath
{
public TransformedPath(string value) { this.value = value; }
readonly string value;
public string Value => value;
public string Extension => Path.GetExtension(value)?.Substring(1) ?? "";
public string NameWithoutExtension => Path.GetFileNameWithoutExtension(value);
public ITransformedPath? ParentDirectory
{
get
{
var dir = Path.GetDirectoryName(value);
if (dir is null)
return null;
var isWindowsDriveLetter = dir.Length == 2 && char.IsLetter(dir[0]) && dir[1] == ':';
if (isWindowsDriveLetter)
return null;
return new TransformedPath(FileUtils.ConvertToUnix(dir));
}
}
public ITransformedPath WithSuffix(string suffix) => new TransformedPath(value + suffix);
public string DatabaseId
{
get
{
var ret = value;
if (ret.Length >= 2 && ret[1] == ':' && Char.IsLower(ret[0]))
ret = Char.ToUpper(ret[0]) + "_" + ret.Substring(2);
return ret.Replace('\\', '/').Replace(":", "_");
}
}
public override int GetHashCode() => 11 * value.GetHashCode();
public override bool Equals(object? obj) => obj is TransformedPath tp && tp.value == value;
public override string ToString() => value;
}
readonly Func<string, string> transform;
/// <summary>
/// Returns the path obtained by transforming `path`.
/// </summary>
public ITransformedPath Transform(string path) => new TransformedPath(transform(path));
/// <summary>
/// Default constructor reads parameters from the environment.
/// </summary>
public PathTransformer(IPathCache pathCache) :
this(pathCache, Environment.GetEnvironmentVariable("CODEQL_PATH_TRANSFORMER") is string file ? File.ReadAllLines(file) : null)
{
}
/// <summary>
/// Creates a path transformer based on the specification in `lines`.
/// Throws `InvalidPathTransformerException` for invalid specifications.
/// </summary>
public PathTransformer(IPathCache pathCache, string[]? lines)
{
if (lines is null)
{
transform = path => FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path));
return;
}
var sections = ParsePathTransformerSpec(lines);
transform = path =>
{
path = FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path));
foreach (var section in sections)
{
if (section.Matches(path, out var transformed))
return transformed;
}
return path;
};
}
static IEnumerable<TransformerSection> ParsePathTransformerSpec(string[] lines)
{
var sections = new List<TransformerSection>();
try
{
int i = 0;
while (i < lines.Length && !lines[i].StartsWith("#"))
i++;
while (i < lines.Length)
{
var section = new TransformerSection(lines, ref i);
sections.Add(section);
}
if (sections.Count == 0)
throw new InvalidPathTransformerException("contains no sections.");
}
catch (InvalidFilePatternException ex)
{
throw new InvalidPathTransformerException(ex.Message);
}
return sections;
}
}
sealed class TransformerSection
{
readonly string name;
readonly List<FilePattern> filePatterns = new List<FilePattern>();
public TransformerSection(string[] lines, ref int i)
{
name = lines[i++].Substring(1);
while (i < lines.Length && !lines[i].StartsWith("#"))
{
if (string.IsNullOrEmpty(lines[i]))
i++;
else
filePatterns.Add(new FilePattern(lines[i++]));
}
}
public bool Matches(string path, [NotNullWhen(true)] out string? transformed)
{
if (FilePattern.Matches(filePatterns, path, out var suffix))
{
transformed = FileUtils.ConvertToUnix(name) + suffix;
return true;
}
transformed = null;
return false;
}
}
}

View File

@@ -14,12 +14,6 @@ namespace Semmle.Extraction
public sealed class TrapWriter : IDisposable
{
public enum InnerPathComputation
{
ABSOLUTE,
RELATIVE
}
public enum CompressionMode
{
None,
@@ -45,7 +39,7 @@ namespace Semmle.Extraction
readonly CompressionMode TrapCompression;
public TrapWriter(ILogger logger, string outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression)
public TrapWriter(ILogger logger, PathTransformer.ITransformedPath outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression)
{
Logger = logger;
TrapCompression = trapCompression;
@@ -107,16 +101,17 @@ namespace Semmle.Extraction
/// Adds the specified input file to the source archive. It may end up in either the normal or long path area
/// of the source archive, depending on the length of its full path.
/// </summary>
/// <param name="inputPath">The path to the input file.</param>
/// <param name="originalPath">The path to the input file.</param>
/// <param name="transformedPath">The transformed path to the input file.</param>
/// <param name="inputEncoding">The encoding used by the input file.</param>
public void Archive(string inputPath, Encoding inputEncoding)
public void Archive(string originalPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding)
{
if (string.IsNullOrEmpty(archive)) return;
// Calling GetFullPath makes this use the canonical capitalisation, if the file exists.
string fullInputPath = Path.GetFullPath(inputPath);
string fullInputPath = Path.GetFullPath(originalPath);
ArchivePath(fullInputPath, inputEncoding);
ArchivePath(fullInputPath, transformedPath, inputEncoding);
}
/// <summary>
@@ -124,14 +119,11 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="inputPath">The path of the file.</param>
/// <param name="contents">The contents of the file.</param>
public void Archive(string inputPath, string contents)
public void Archive(PathTransformer.ITransformedPath inputPath, string contents)
{
if (string.IsNullOrEmpty(archive)) return;
// Calling GetFullPath makes this use the canonical capitalisation, if the file exists.
string fullInputPath = Path.GetFullPath(inputPath);
ArchiveContents(fullInputPath, contents);
ArchiveContents(inputPath, contents);
}
/// <summary>
@@ -210,18 +202,19 @@ namespace Semmle.Extraction
/// source archive less than the system path limit of 260 characters.
/// </summary>
/// <param name="fullInputPath">The full path to the input file.</param>
/// <param name="transformedPath">The transformed path to the input file.</param>
/// <param name="inputEncoding">The encoding used by the input file.</param>
/// <exception cref="PathTooLongException">If the output path in the source archive would
/// exceed the system path limit of 260 characters.</exception>
private void ArchivePath(string fullInputPath, Encoding inputEncoding)
private void ArchivePath(string fullInputPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding)
{
string contents = File.ReadAllText(fullInputPath, inputEncoding);
ArchiveContents(fullInputPath, contents);
ArchiveContents(transformedPath, contents);
}
private void ArchiveContents(string fullInputPath, string contents)
private void ArchiveContents(PathTransformer.ITransformedPath transformedPath, string contents)
{
string dest = NestPaths(Logger, archive, fullInputPath, InnerPathComputation.ABSOLUTE);
string dest = NestPaths(Logger, archive, transformedPath.Value);
string tmpSrcFile = Path.GetTempFileName();
File.WriteAllText(tmpSrcFile, contents, UTF8);
try
@@ -236,14 +229,11 @@ namespace Semmle.Extraction
}
}
public static string NestPaths(ILogger logger, string? outerpath, string innerpath, InnerPathComputation innerPathComputation)
public static string NestPaths(ILogger logger, string? outerpath, string innerpath)
{
string nested = innerpath;
if (!string.IsNullOrEmpty(outerpath))
{
if (!Path.IsPathRooted(innerpath) && innerPathComputation == InnerPathComputation.ABSOLUTE)
innerpath = Path.GetFullPath(innerpath);
// Remove all leading path separators / or \
// For example, UNC paths have two leading \\
innerpath = innerpath.TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
@@ -276,13 +266,13 @@ namespace Semmle.Extraction
}
}
public static string TrapPath(ILogger logger, string? folder, string filename, TrapWriter.CompressionMode trapCompression)
public static string TrapPath(ILogger logger, string? folder, PathTransformer.ITransformedPath path, TrapWriter.CompressionMode trapCompression)
{
filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}";
var filename = $"{path.Value}.trap{TrapExtension(trapCompression)}";
if (string.IsNullOrEmpty(folder))
folder = Directory.GetCurrentDirectory();
return NestPaths(logger, folder, filename, InnerPathComputation.ABSOLUTE); ;
return NestPaths(logger, folder, filename);
}
}
}