Merge pull request #4212 from hvitved/csharp/path-transformers

C#: Implement support for path transformers
This commit is contained in:
Tom Hvitved
2020-09-22 09:20:53 +02:00
committed by GitHub
20 changed files with 623 additions and 271 deletions

View File

@@ -37,7 +37,7 @@ namespace Semmle.Extraction.CIL
namespaceFactory = new CachedFunction<StringHandle, Entities.Namespace>(n => CreateNamespace(mdReader.GetString(n)));
namespaceDefinitionFactory = new CachedFunction<NamespaceDefinitionHandle, Entities.Namespace>(CreateNamespace);
sourceFiles = new CachedFunction<PDB.ISourceFile, Entities.PdbSourceFile>(path => new Entities.PdbSourceFile(this, path));
folders = new CachedFunction<string, Entities.Folder>(path => new Entities.Folder(this, path));
folders = new CachedFunction<PathTransformer.ITransformedPath, Entities.Folder>(path => new Entities.Folder(this, path));
sourceLocations = new CachedFunction<PDB.Location, Entities.PdbSourceLocation>(location => new Entities.PdbSourceLocation(this, location));
defaultGenericContext = new EmptyContext(this);

View File

@@ -5,6 +5,7 @@ using Semmle.Util.Logging;
using System;
using Semmle.Extraction.Entities;
using System.IO;
using Semmle.Util;
namespace Semmle.Extraction.CIL.Entities
{
@@ -134,9 +135,12 @@ namespace Semmle.Extraction.CIL.Entities
extracted = false;
try
{
var extractor = new Extractor(false, assemblyPath, logger);
var project = layout.LookupProjectOrDefault(assemblyPath);
using (var trapWriter = project.CreateTrapWriter(logger, assemblyPath + ".cil", true, trapCompression))
var canonicalPathCache = CanonicalPathCache.Create(logger, 1000);
var pathTransformer = new PathTransformer(canonicalPathCache);
var extractor = new Extractor(false, assemblyPath, logger, pathTransformer);
var transformedAssemblyPath = pathTransformer.Transform(assemblyPath);
var project = layout.LookupProjectOrDefault(transformedAssemblyPath);
using (var trapWriter = project.CreateTrapWriter(logger, transformedAssemblyPath.WithSuffix(".cil"), true, trapCompression))
{
trapFile = trapWriter.TrapFile;
if (nocache || !System.IO.File.Exists(trapFile))

View File

@@ -13,33 +13,38 @@ namespace Semmle.Extraction.CIL.Entities
public class File : LabelledEntity, IFile
{
protected readonly string path;
protected readonly string OriginalPath;
protected readonly PathTransformer.ITransformedPath TransformedPath;
public File(Context cx, string path) : base(cx)
{
this.path = Semmle.Extraction.Entities.File.PathAsDatabaseString(path);
this.OriginalPath = path;
TransformedPath = cx.cx.Extractor.PathTransformer.Transform(OriginalPath);
}
public override void WriteId(TextWriter trapFile)
{
trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path));
trapFile.Write(TransformedPath.DatabaseId);
}
public override bool Equals(object obj)
{
return GetType() == obj.GetType() && path == ((File)obj).path;
return GetType() == obj.GetType() && OriginalPath == ((File)obj).OriginalPath;
}
public override int GetHashCode() => 11 * path.GetHashCode();
public override int GetHashCode() => 11 * OriginalPath.GetHashCode();
public override IEnumerable<IExtractionProduct> Contents
{
get
{
var parent = cx.CreateFolder(System.IO.Path.GetDirectoryName(path));
yield return parent;
yield return Tuples.containerparent(parent, this);
yield return Tuples.files(this, path, System.IO.Path.GetFileNameWithoutExtension(path), System.IO.Path.GetExtension(path).Substring(1));
if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir)
{
var parent = cx.CreateFolder(dir);
yield return parent;
yield return Tuples.containerparent(parent, this);
}
yield return Tuples.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension);
}
}
@@ -65,9 +70,9 @@ namespace Semmle.Extraction.CIL.Entities
var text = file.Contents;
if (text == null)
cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", path));
cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", OriginalPath));
else
cx.cx.TrapWriter.Archive(path, text);
cx.cx.TrapWriter.Archive(TransformedPath, text);
yield return Tuples.file_extraction_mode(this, 2);
}

View File

@@ -9,16 +9,16 @@ namespace Semmle.Extraction.CIL.Entities
public sealed class Folder : LabelledEntity, IFolder
{
readonly string path;
readonly PathTransformer.ITransformedPath TransformedPath;
public Folder(Context cx, string path) : base(cx)
public Folder(Context cx, PathTransformer.ITransformedPath path) : base(cx)
{
this.path = path;
this.TransformedPath = path;
}
public override void WriteId(TextWriter trapFile)
{
trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path));
trapFile.Write(TransformedPath.DatabaseId);
}
public override string IdSuffix => ";folder";
@@ -27,25 +27,21 @@ namespace Semmle.Extraction.CIL.Entities
{
get
{
// On Posix, we could get a Windows directory of the form "C:"
bool windowsDriveLetter = path.Length == 2 && char.IsLetter(path[0]) && path[1] == ':';
var parent = Path.GetDirectoryName(path);
if (parent != null && !windowsDriveLetter)
if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath parent)
{
var parentFolder = cx.CreateFolder(parent);
yield return parentFolder;
yield return Tuples.containerparent(parentFolder, this);
}
yield return Tuples.folders(this, Semmle.Extraction.Entities.File.PathAsDatabaseString(path), Path.GetFileName(path));
yield return Tuples.folders(this, TransformedPath.Value, TransformedPath.NameWithoutExtension);
}
}
public override bool Equals(object obj)
{
return obj is Folder folder && path == folder.path;
return obj is Folder folder && TransformedPath == folder.TransformedPath;
}
public override int GetHashCode() => path.GetHashCode();
public override int GetHashCode() => TransformedPath.GetHashCode();
}
}

View File

@@ -201,7 +201,7 @@ namespace Semmle.Extraction.CIL
#region Locations
readonly CachedFunction<PDB.ISourceFile, PdbSourceFile> sourceFiles;
readonly CachedFunction<string, Folder> folders;
readonly CachedFunction<PathTransformer.ITransformedPath, Folder> folders;
readonly CachedFunction<PDB.Location, PdbSourceLocation> sourceLocations;
/// <summary>
@@ -216,7 +216,7 @@ namespace Semmle.Extraction.CIL
/// </summary>
/// <param name="path">The path of the folder.</param>
/// <returns>A folder entity.</returns>
public Folder CreateFolder(string path) => folders[path];
public Folder CreateFolder(PathTransformer.ITransformedPath path) => folders[path];
/// <summary>
/// Creates a source location.

View File

@@ -25,12 +25,15 @@ namespace Semmle.Extraction.CSharp
public readonly ILogger Logger;
public Analyser(IProgressMonitor pm, ILogger logger)
public readonly PathTransformer PathTransformer;
public Analyser(IProgressMonitor pm, ILogger logger, PathTransformer pathTransformer)
{
Logger = logger;
Logger.Log(Severity.Info, "EXTRACTION STARTING at {0}", DateTime.Now);
stopWatch.Start();
progressMonitor = pm;
PathTransformer = pathTransformer;
}
CSharpCompilation compilation;
@@ -64,7 +67,7 @@ namespace Semmle.Extraction.CSharp
layout = new Layout();
this.options = options;
this.compilation = compilation;
extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger);
extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger, PathTransformer);
LogDiagnostics();
SetReferencePaths();
@@ -114,7 +117,7 @@ namespace Semmle.Extraction.CSharp
{
compilation = compilationIn;
layout = new Layout();
extractor = new Extraction.Extractor(true, null, Logger);
extractor = new Extraction.Extractor(true, null, Logger, PathTransformer);
this.options = options;
LogExtractorInfo(Extraction.Extractor.Version);
SetReferencePaths();
@@ -227,9 +230,10 @@ namespace Semmle.Extraction.CSharp
try
{
var assemblyPath = extractor.OutputPath;
var transformedAssemblyPath = PathTransformer.Transform(assemblyPath);
var assembly = compilation.Assembly;
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression);
var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath);
var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression);
compilationTrapFile = trapWriter; // Dispose later
var cx = extractor.CreateContext(compilation.Clone(), trapWriter, new AssemblyScope(assembly, assemblyPath, true));
@@ -257,8 +261,9 @@ namespace Semmle.Extraction.CSharp
stopwatch.Start();
var assemblyPath = r.FilePath;
var projectLayout = layout.LookupProjectOrDefault(assemblyPath);
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression))
var transformedAssemblyPath = PathTransformer.Transform(assemblyPath);
var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath);
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression))
{
var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile);
@@ -357,16 +362,17 @@ namespace Semmle.Extraction.CSharp
var stopwatch = new Stopwatch();
stopwatch.Start();
var sourcePath = tree.FilePath;
var transformedSourcePath = PathTransformer.Transform(sourcePath);
var projectLayout = layout.LookupProjectOrNull(sourcePath);
var projectLayout = layout.LookupProjectOrNull(transformedSourcePath);
bool excluded = projectLayout == null;
string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, sourcePath, options.TrapCompression);
string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, transformedSourcePath, options.TrapCompression);
bool upToDate = false;
if (!excluded)
{
// compilation.Clone() is used to allow symbols to be garbage collected.
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, sourcePath, false, options.TrapCompression))
using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedSourcePath, false, options.TrapCompression))
{
upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile);

View File

@@ -3,6 +3,7 @@ using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Semmle.Util;
namespace Semmle.Extraction.CSharp.Entities
{
@@ -22,32 +23,32 @@ namespace Semmle.Extraction.CSharp.Entities
{
Extraction.Entities.Assembly.CreateOutputAssembly(cx);
trapFile.compilations(this, Extraction.Entities.File.PathAsDatabaseString(cwd));
trapFile.compilations(this, FileUtils.ConvertToUnix(cwd));
// Arguments
int index = 0;
foreach(var arg in args)
foreach (var arg in args)
{
trapFile.compilation_args(this, index++, arg);
}
// Files
index = 0;
foreach(var file in cx.Compilation.SyntaxTrees.Select(tree => Extraction.Entities.File.Create(cx, tree.FilePath)))
foreach (var file in cx.Compilation.SyntaxTrees.Select(tree => Extraction.Entities.File.Create(cx, tree.FilePath)))
{
trapFile.compilation_compiling_files(this, index++, file);
}
// References
index = 0;
foreach(var file in cx.Compilation.References.OfType<PortableExecutableReference>().Select(r => Extraction.Entities.File.Create(cx, r.FilePath)))
foreach (var file in cx.Compilation.References.OfType<PortableExecutableReference>().Select(r => Extraction.Entities.File.Create(cx, r.FilePath)))
{
trapFile.compilation_referencing_files(this, index++, file);
}
// Diagnostics
index = 0;
foreach(var diag in cx.Compilation.GetDiagnostics().Select(d => new Diagnostic(cx, d)))
foreach (var diag in cx.Compilation.GetDiagnostics().Select(d => new Diagnostic(cx, d)))
{
trapFile.diagnostic_for(diag, this, 0, index++);
}
@@ -57,7 +58,7 @@ namespace Semmle.Extraction.CSharp.Entities
{
var trapFile = cx.TrapWriter.Writer;
int index = 0;
foreach(float metric in p.Metrics)
foreach (float metric in p.Metrics)
{
trapFile.compilation_time(this, -1, index++, metric);
}

View File

@@ -76,16 +76,16 @@ namespace Semmle.Extraction.CSharp
return ExitCode.Ok;
}
using (var analyser = new Analyser(new LogProgressMonitor(logger), logger))
var canonicalPathCache = CanonicalPathCache.Create(logger, 1000);
var pathTransformer = new PathTransformer(canonicalPathCache);
using (var analyser = new Analyser(new LogProgressMonitor(logger), logger, pathTransformer))
using (var references = new BlockingCollection<MetadataReference>())
{
try
{
var compilerVersion = new CompilerVersion(commandLineArguments);
bool preserveSymlinks = Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true";
var canonicalPathCache = CanonicalPathCache.Create(logger, 1000, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow);
if (compilerVersion.SkipExtraction)
{
logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason);
@@ -317,7 +317,10 @@ namespace Semmle.Extraction.CSharp
ILogger logger,
CommonOptions options)
{
using (var analyser = new Analyser(pm, logger))
var canonicalPathCache = CanonicalPathCache.Create(logger, 1000);
var pathTransformer = new PathTransformer(canonicalPathCache);
using (var analyser = new Analyser(pm, logger, pathTransformer))
using (var references = new BlockingCollection<MetadataReference>())
{
try

View File

@@ -0,0 +1,48 @@
using Xunit;
namespace Semmle.Extraction.Tests
{
public class FilePatternTests
{
[Fact]
public void TestRegexCompilation()
{
var fp = new FilePattern("/hadoop*");
Assert.Equal("^hadoop[^/]*.*", fp.RegexPattern);
fp = new FilePattern("**/org/apache/hadoop");
Assert.Equal("^.*/org/apache/hadoop.*", fp.RegexPattern);
fp = new FilePattern("hadoop-common/**/test// ");
Assert.Equal("^hadoop-common/.*/test(?<doubleslash>/).*", fp.RegexPattern);
fp = new FilePattern(@"-C:\agent\root\asdf//");
Assert.Equal("^C:/agent/root/asdf(?<doubleslash>/).*", fp.RegexPattern);
fp = new FilePattern(@"-C:\agent+\[root]\asdf//");
Assert.Equal(@"^C:/agent\+/\[root]/asdf(?<doubleslash>/).*", fp.RegexPattern);
}
[Fact]
public void TestMatching()
{
var fp1 = new FilePattern(@"C:\agent\root\abc//");
var fp2 = new FilePattern(@"C:\agent\root\def//ghi");
var patterns = new[] { fp1, fp2 };
var success = FilePattern.Matches(patterns, @"C:\agent\root\abc\file.cs", out var s);
Assert.True(success);
Assert.Equal("/file.cs", s);
success = FilePattern.Matches(patterns, @"C:\agent\root\def\ghi\file.cs", out s);
Assert.True(success);
Assert.Equal("/ghi/file.cs", s);
success = FilePattern.Matches(patterns, @"C:\agent\root\def\file.cs", out s);
Assert.False(success);
}
[Fact]
public void TestInvalidPatterns()
{
Assert.Throws<InvalidFilePatternException>(() => new FilePattern("/abc//def//ghi"));
Assert.Throws<InvalidFilePatternException>(() => new FilePattern("/abc**def"));
}
}
}

View File

@@ -5,6 +5,26 @@ using System.Runtime.InteropServices;
namespace Semmle.Extraction.Tests
{
struct TransformedPathStub : PathTransformer.ITransformedPath
{
readonly string value;
public TransformedPathStub(string value) => this.value = value;
public string Value => value;
public string Extension => throw new System.NotImplementedException();
public string NameWithoutExtension => throw new System.NotImplementedException();
public PathTransformer.ITransformedPath ParentDirectory => throw new System.NotImplementedException();
public string DatabaseId => throw new System.NotImplementedException();
public PathTransformer.ITransformedPath WithSuffix(string suffix)
{
throw new System.NotImplementedException();
}
}
public class Layout
{
readonly ILogger Logger = new LoggerMock();
@@ -13,10 +33,10 @@ namespace Semmle.Extraction.Tests
public void TestDefaultLayout()
{
var layout = new Semmle.Extraction.Layout(null, null, null);
var project = layout.LookupProjectOrNull("foo.cs");
var project = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs"));
// All files are mapped when there's no layout file.
Assert.True(layout.FileInLayout("foo.cs"));
Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs")));
// Test trap filename
var tmpDir = Path.GetTempPath();
@@ -28,13 +48,13 @@ namespace Semmle.Extraction.Tests
Assert.NotEqual(Directory.GetCurrentDirectory(), tmpDir);
return;
}
var f1 = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
var f1 = project.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz");
Assert.Equal(f1, g1);
// Test trap file generation
var trapwriterFilename = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
using (var trapwriter = project.CreateTrapWriter(Logger, "foo.cs", false, TrapWriter.CompressionMode.Gzip))
var trapwriterFilename = project.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip);
using (var trapwriter = project.CreateTrapWriter(Logger, new TransformedPathStub("foo.cs"), false, TrapWriter.CompressionMode.Gzip))
{
trapwriter.Emit("1=*");
Assert.False(File.Exists(trapwriterFilename));
@@ -63,23 +83,23 @@ namespace Semmle.Extraction.Tests
var layout = new Semmle.Extraction.Layout(null, null, "layout.txt");
// Test general pattern matching
Assert.True(layout.FileInLayout("bar.cs"));
Assert.False(layout.FileInLayout("foo.cs"));
Assert.False(layout.FileInLayout("goo.cs"));
Assert.False(layout.FileInLayout("excluded/bar.cs"));
Assert.True(layout.FileInLayout("excluded/foo.cs"));
Assert.True(layout.FileInLayout("included/foo.cs"));
Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs")));
Assert.False(layout.FileInLayout(new TransformedPathStub("foo.cs")));
Assert.False(layout.FileInLayout(new TransformedPathStub("goo.cs")));
Assert.False(layout.FileInLayout(new TransformedPathStub("excluded/bar.cs")));
Assert.True(layout.FileInLayout(new TransformedPathStub("excluded/foo.cs")));
Assert.True(layout.FileInLayout(new TransformedPathStub("included/foo.cs")));
// Test the trap file
var project = layout.LookupProjectOrNull("bar.cs");
var trapwriterFilename = project.GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip);
Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE),
var project = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs"));
var trapwriterFilename = project.GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip);
Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz"),
trapwriterFilename);
// Test the source archive
var trapWriter = project.CreateTrapWriter(Logger, "bar.cs", false, TrapWriter.CompressionMode.Gzip);
trapWriter.Archive("layout.txt", System.Text.Encoding.ASCII);
var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt", TrapWriter.InnerPathComputation.ABSOLUTE);
var trapWriter = project.CreateTrapWriter(Logger, new TransformedPathStub("bar.cs"), false, TrapWriter.CompressionMode.Gzip);
trapWriter.Archive("layout.txt", new TransformedPathStub("layout.txt"), System.Text.Encoding.ASCII);
var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt");
Assert.True(File.Exists(writtenFile));
File.Delete("layout.txt");
}
@@ -89,9 +109,9 @@ namespace Semmle.Extraction.Tests
{
// When you specify both a trap file and a layout, use the trap file.
var layout = new Semmle.Extraction.Layout(Path.GetFullPath("snapshot\\trap"), null, "something.txt");
Assert.True(layout.FileInLayout("bar.cs"));
var f1 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs")));
var f1 = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")).GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz");
Assert.Equal(f1, g1);
}
@@ -117,26 +137,26 @@ namespace Semmle.Extraction.Tests
var layout = new Semmle.Extraction.Layout(null, null, "layout.txt");
// Use Section 2
Assert.True(layout.FileInLayout("bar.cs"));
var f1 = layout.LookupProjectOrNull("bar.cs").GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs")));
var f1 = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs")).GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip);
var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz");
Assert.Equal(f1, g1);
// Use Section 1
Assert.True(layout.FileInLayout("foo.cs"));
var f2 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip);
var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs")));
var f2 = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")).GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip);
var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz");
Assert.Equal(f2, g2);
// boo.dll is not in the layout, so use layout from first section.
Assert.False(layout.FileInLayout("boo.dll"));
var f3 = layout.LookupProjectOrDefault("boo.dll").GetTrapPath(Logger, "boo.dll", TrapWriter.CompressionMode.Gzip);
var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE);
Assert.False(layout.FileInLayout(new TransformedPathStub("boo.dll")));
var f3 = layout.LookupProjectOrDefault(new TransformedPathStub("boo.dll")).GetTrapPath(Logger, new TransformedPathStub("boo.dll"), TrapWriter.CompressionMode.Gzip);
var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz");
Assert.Equal(f3, g3);
// boo.cs is not in the layout, so return null
Assert.False(layout.FileInLayout("boo.cs"));
Assert.Null(layout.LookupProjectOrNull("boo.cs"));
Assert.False(layout.FileInLayout(new TransformedPathStub("boo.cs")));
Assert.Null(layout.LookupProjectOrNull(new TransformedPathStub("boo.cs")));
}
[Fact]

View File

@@ -0,0 +1,45 @@
using Semmle.Util;
using Xunit;
namespace Semmle.Extraction.Tests
{
class PathCacheStub : IPathCache
{
public string GetCanonicalPath(string path) => path;
}
public class PathTransformerTests
{
[Fact]
public void TestTransformerFile()
{
var spec = new string[]
{
@"#D:\src",
@"C:\agent*\src//",
@"-C:\agent*\src\external",
@"",
@"#empty",
@"",
@"#src2",
@"/agent*//src",
@"",
@"#optsrc",
@"opt/src//"
};
var pathTransformer = new PathTransformer(new PathCacheStub(), spec);
// Windows-style matching
Assert.Equal(@"C:/bar.cs", pathTransformer.Transform(@"C:\bar.cs").Value);
Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent42\src\file.cs").Value);
Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent43\src\file.cs").Value);
Assert.Equal(@"C:/agent43/src/external/file.cs", pathTransformer.Transform(@"C:\agent43\src\external\file.cs").Value);
// Linux-style matching
Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent/src/file.cs").Value);
Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent42/src/file.cs").Value);
Assert.Equal(@"optsrc/file.cs", pathTransformer.Transform(@"/opt/src/file.cs").Value);
}
}
}

View File

@@ -14,7 +14,7 @@ namespace Semmle.Extraction.Tests
string tempDir = System.IO.Path.GetTempPath();
string root1, root2, root3;
if(Win32.IsWindows())
if (Win32.IsWindows())
{
root1 = "E:";
root2 = "e:";
@@ -27,32 +27,21 @@ namespace Semmle.Extraction.Tests
root3 = "/";
}
string formattedTempDir = tempDir.Replace('/', '\\').Replace(':', '_').Trim('\\');
var logger = new LoggerMock();
System.IO.Directory.SetCurrentDirectory(tempDir);
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
// `Directory.SetCurrentDirectory()` doesn't seem to work on macOS,
// so disable this test on macOS, for now
Assert.NotEqual(Directory.GetCurrentDirectory(), tempDir);
return;
}
Assert.Equal($@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\'));
Assert.Equal($@"C:\Temp\source_archive\{formattedTempDir}\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/','\\'));
Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs").Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs").Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\'));
Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs").Replace('/', '\\'));
}
class LoggerMock : ILogger

View File

@@ -10,93 +10,55 @@ namespace Semmle.Extraction.Entities
File(Context cx, string path)
: base(cx, path)
{
Path = path;
OriginalPath = path;
TransformedPathLazy = new Lazy<PathTransformer.ITransformedPath>(() => Context.Extractor.PathTransformer.Transform(OriginalPath));
}
public string Path
{
get;
private set;
}
readonly string OriginalPath;
readonly Lazy<PathTransformer.ITransformedPath> TransformedPathLazy;
PathTransformer.ITransformedPath TransformedPath => TransformedPathLazy.Value;
public string DatabasePath => PathAsDatabaseId(Path);
public override bool NeedsPopulation => Context.DefinesFile(Path) || Path == Context.Extractor.OutputPath;
public override bool NeedsPopulation => Context.DefinesFile(OriginalPath) || OriginalPath == Context.Extractor.OutputPath;
public override void Populate(TextWriter trapFile)
{
if (Path == null)
trapFile.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension);
if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir)
trapFile.containerparent(Folder.Create(Context, dir), this);
var fromSource = TransformedPath.Extension.ToLowerInvariant().Equals("cs");
if (fromSource)
{
trapFile.files(this, "", "", "");
}
else
{
var fi = new FileInfo(Path);
string extension = fi.Extension ?? "";
string name = fi.Name;
name = name.Substring(0, name.Length - extension.Length);
int fromSource = extension.ToLowerInvariant().Equals(".cs") ? 1 : 2;
// remove the dot from the extension
if (extension.Length > 0)
extension = extension.Substring(1);
trapFile.files(this, PathAsDatabaseString(Path), name, extension);
trapFile.containerparent(Folder.Create(Context, fi.Directory), this);
if (fromSource == 1)
foreach (var text in Context.Compilation.SyntaxTrees.
Where(t => t.FilePath == OriginalPath).
Select(tree => tree.GetText()))
{
foreach (var text in Context.Compilation.SyntaxTrees.
Where(t => t.FilePath == Path).
Select(tree => tree.GetText()))
{
var rawText = text.ToString() ?? "";
var lineCounts = LineCounter.ComputeLineCounts(rawText);
if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++;
var rawText = text.ToString() ?? "";
var lineCounts = LineCounter.ComputeLineCounts(rawText);
if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++;
trapFile.numlines(this, lineCounts);
Context.TrapWriter.Archive(fi.FullName, text.Encoding ?? System.Text.Encoding.Default);
}
trapFile.numlines(this, lineCounts);
Context.TrapWriter.Archive(OriginalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default);
}
trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0);
}
trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0);
}
public override void WriteId(System.IO.TextWriter trapFile)
{
if (Path is null)
trapFile.Write("GENERATED;sourcefile");
else
{
trapFile.Write(DatabasePath);
trapFile.Write(";sourcefile");
}
trapFile.Write(TransformedPath.DatabaseId);
trapFile.Write(";sourcefile");
}
/// <summary>
/// Converts a path string into a string to use as an ID
/// in the QL database.
/// </summary>
/// <param name="path">An absolute path.</param>
/// <returns>The database ID.</returns>
public static string PathAsDatabaseId(string path)
{
if (path.Length >= 2 && path[1] == ':' && Char.IsLower(path[0]))
path = Char.ToUpper(path[0]) + "_" + path.Substring(2);
return path.Replace('\\', '/').Replace(":", "_");
}
public static string PathAsDatabaseString(string path) => path.Replace('\\', '/');
public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, path);
public static File CreateGenerated(Context cx) => GeneratedFile.Create(cx);
class GeneratedFile : File
{
GeneratedFile(Context cx)
: base(cx, "") { }
GeneratedFile(Context cx) : base(cx, "") { }
public override bool NeedsPopulation => true;

View File

@@ -2,65 +2,44 @@ using System.IO;
namespace Semmle.Extraction.Entities
{
sealed class Folder : CachedEntity<DirectoryInfo>
sealed class Folder : CachedEntity<PathTransformer.ITransformedPath>
{
Folder(Context cx, DirectoryInfo init)
: base(cx, init)
{
Path = init.FullName;
}
public string Path
{
get;
private set;
}
public string DatabasePath => File.PathAsDatabaseId(Path);
Folder(Context cx, PathTransformer.ITransformedPath init) : base(cx, init) { }
public override void Populate(TextWriter trapFile)
{
// Ensure that the name of the root directory is consistent
// with the XmlTrapWriter.
// Linux/Windows: java.io.File.getName() returns ""
// On Linux: System.IO.DirectoryInfo.Name returns "/"
// On Windows: System.IO.DirectoryInfo.Name returns "L:\"
string shortName = symbol.Parent == null ? "" : symbol.Name;
trapFile.folders(this, File.PathAsDatabaseString(Path), shortName);
if (symbol.Parent != null)
{
trapFile.containerparent(Create(Context, symbol.Parent), this);
}
trapFile.folders(this, symbol.Value, symbol.NameWithoutExtension);
if (symbol.ParentDirectory is PathTransformer.ITransformedPath parent)
trapFile.containerparent(Create(Context, parent), this);
}
public override bool NeedsPopulation => true;
public override void WriteId(System.IO.TextWriter trapFile)
{
trapFile.Write(DatabasePath);
trapFile.Write(symbol.DatabaseId);
trapFile.Write(";folder");
}
public static Folder Create(Context cx, DirectoryInfo folder) =>
public static Folder Create(Context cx, PathTransformer.ITransformedPath folder) =>
FolderFactory.Instance.CreateEntity2(cx, folder);
public override Microsoft.CodeAnalysis.Location? ReportingLocation => null;
class FolderFactory : ICachedEntityFactory<DirectoryInfo, Folder>
class FolderFactory : ICachedEntityFactory<PathTransformer.ITransformedPath, Folder>
{
public static readonly FolderFactory Instance = new FolderFactory();
public Folder Create(Context cx, DirectoryInfo init) => new Folder(cx, init);
public Folder Create(Context cx, PathTransformer.ITransformedPath init) => new Folder(cx, init);
}
public override TrapStackBehaviour TrapStackBehaviour => TrapStackBehaviour.NoLabel;
public override int GetHashCode() => Path.GetHashCode();
public override int GetHashCode() => symbol.GetHashCode();
public override bool Equals(object? obj)
{
return obj is Folder folder && folder.Path == Path;
return obj is Folder folder && Equals(folder.symbol, symbol);
}
}
}

View File

@@ -81,6 +81,11 @@ namespace Semmle.Extraction
/// </summary>
ILogger Logger { get; }
/// <summary>
/// The path transformer to apply.
/// </summary>
PathTransformer PathTransformer { get; }
/// <summary>
/// Creates a new context.
/// </summary>
@@ -111,11 +116,14 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="standalone">If the extraction is standalone.</param>
/// <param name="outputPath">The name of the output DLL/EXE, or null if not specified (standalone extraction).</param>
public Extractor(bool standalone, string outputPath, ILogger logger)
/// <param name="logger">The object used for logging.</param>
/// <param name="pathTransformer">The object used for path transformations.</param>
public Extractor(bool standalone, string outputPath, ILogger logger, PathTransformer pathTransformer)
{
Standalone = standalone;
OutputPath = outputPath;
Logger = logger;
PathTransformer = pathTransformer;
}
// Limit the number of error messages in the log file
@@ -205,5 +213,7 @@ namespace Semmle.Extraction
public ILogger Logger { get; private set; }
public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})";
public PathTransformer PathTransformer { get; }
}
}

View File

@@ -0,0 +1,131 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Diagnostics.CodeAnalysis;
using Semmle.Util;
namespace Semmle.Extraction
{
public sealed class InvalidFilePatternException : Exception
{
public InvalidFilePatternException(string pattern, string message) :
base($"Invalid file pattern '{pattern}': {message}")
{ }
}
/// <summary>
/// A file pattern, as used in either an extractor layout file or
/// a path transformer file.
/// </summary>
public sealed class FilePattern
{
/// <summary>
/// Whether this is an inclusion pattern.
/// </summary>
public bool Include { get; }
public FilePattern(string pattern)
{
Include = true;
if (pattern.StartsWith("-"))
{
pattern = pattern.Substring(1);
Include = false;
}
pattern = FileUtils.ConvertToUnix(pattern.Trim()).TrimStart('/');
RegexPattern = BuildRegex(pattern).ToString();
}
/// <summary>
/// Constructs a regex string from a file pattern. Throws
/// `InvalidFilePatternException` for invalid patterns.
/// </summary>
static StringBuilder BuildRegex(string pattern)
{
bool HasCharAt(int i, Predicate<char> p) =>
i >= 0 && i < pattern.Length && p(pattern[i]);
var sb = new StringBuilder();
var i = 0;
var seenDoubleSlash = false;
sb.Append('^');
while (i < pattern.Length)
{
if (pattern[i] == '/')
{
if (HasCharAt(i + 1, c => c == '/'))
{
if (seenDoubleSlash)
throw new InvalidFilePatternException(pattern, "'//' is allowed at most once.");
sb.Append("(?<doubleslash>/)");
i += 2;
seenDoubleSlash = true;
}
else
{
sb.Append('/');
i++;
}
}
else if (pattern[i] == '*')
{
if (HasCharAt(i + 1, c => c == '*'))
{
if (HasCharAt(i - 1, c => c != '/'))
throw new InvalidFilePatternException(pattern, "'**' preceeded by non-`/` character.");
if (HasCharAt(i + 2, c => c != '/'))
throw new InvalidFilePatternException(pattern, "'**' succeeded by non-`/` character");
sb.Append(".*");
i += 2;
}
else
{
sb.Append("[^/]*");
i++;
}
}
else
sb.Append(Regex.Escape(pattern[i++].ToString()));
}
return sb.Append(".*");
}
/// <summary>
/// The regex pattern compiled from this file pattern.
/// </summary>
public string RegexPattern { get; }
/// <summary>
/// Returns `true` if the set of file patterns `patterns` match the path `path`.
/// If so, `transformerSuffix` will contain the part of `path` that needs to be
/// suffixed when using path transformers.
/// </summary>
public static bool Matches(IEnumerable<FilePattern> patterns, string path, [NotNullWhen(true)] out string? transformerSuffix)
{
path = FileUtils.ConvertToUnix(path).TrimStart('/');
foreach (var pattern in patterns.Reverse())
{
var m = new Regex(pattern.RegexPattern).Match(path);
if (m.Success)
{
if (pattern.Include)
{
transformerSuffix = m.Groups.TryGetValue("doubleslash", out var group)
? path.Substring(group.Index)
: path;
return true;
}
transformerSuffix = null;
return false;
}
}
transformerSuffix = null;
return false;
}
}
}

View File

@@ -54,14 +54,15 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>The full filepath of the trap file.</returns>
public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
public string GetTrapPath(ILogger logger, PathTransformer.ITransformedPath srcFile, TrapWriter.CompressionMode trapCompression) =>
TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression);
/// <summary>
/// Creates a trap writer for a given source/assembly file.
/// </summary>
/// <param name="srcFile">The source file.</param>
/// <returns>A newly created TrapWriter.</returns>
public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
public TrapWriter CreateTrapWriter(ILogger logger, PathTransformer.ITransformedPath srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) =>
new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression);
}
@@ -73,7 +74,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="sourceFile">The file to look up.</param>
/// <returns>The relevant subproject, or null if not found.</returns>
public SubProject? LookupProjectOrNull(string sourceFile)
public SubProject? LookupProjectOrNull(PathTransformer.ITransformedPath sourceFile)
{
if (!useLayoutFile) return DefaultProject;
@@ -89,7 +90,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="sourceFile">The file to look up.</param>
/// <returns>The relevant subproject, or DefaultProject if not found.</returns>
public SubProject LookupProjectOrDefault(string sourceFile)
public SubProject LookupProjectOrDefault(PathTransformer.ITransformedPath sourceFile)
{
return LookupProjectOrNull(sourceFile) ?? DefaultProject;
}
@@ -134,7 +135,7 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="path">The absolute path of the file to query.</param>
/// <returns>True iff there is no layout file or the layout file specifies the file.</returns>
public bool FileInLayout(string path) => LookupProjectOrNull(path) != null;
public bool FileInLayout(PathTransformer.ITransformedPath path) => LookupProjectOrNull(path) != null;
void ReadLayoutFile(string layout)
{
@@ -167,33 +168,7 @@ namespace Semmle.Extraction
sealed class LayoutBlock
{
struct Condition
{
private readonly bool include;
private readonly string prefix;
public bool Include => include;
public string Prefix => prefix;
public Condition(string line)
{
include = false;
if (line.StartsWith("-"))
line = line.Substring(1);
else
include = true;
prefix = Normalise(line.Trim());
}
static public string Normalise(string path)
{
path = Path.GetFullPath(path);
return path.Replace('\\', '/');
}
}
private readonly List<Condition> conditions = new List<Condition>();
private readonly List<FilePattern> filePatterns = new List<FilePattern>();
public readonly Layout.SubProject Directories;
@@ -219,22 +194,10 @@ namespace Semmle.Extraction
ReadVariable("ODASA_BUILD_ERROR_DIR", lines[i++]);
while (i < lines.Length && !lines[i].StartsWith("#"))
{
conditions.Add(new Condition(lines[i++]));
filePatterns.Add(new FilePattern(lines[i++]));
}
}
public bool Matches(string path)
{
bool matches = false;
path = Condition.Normalise(path);
foreach (Condition condition in conditions)
{
if (condition.Include)
matches |= path.StartsWith(condition.Prefix);
else
matches &= !path.StartsWith(condition.Prefix);
}
return matches;
}
public bool Matches(PathTransformer.ITransformedPath path) => FilePattern.Matches(filePatterns, path.Value, out var _);
}
}

View File

@@ -0,0 +1,177 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Diagnostics.CodeAnalysis;
using Semmle.Util;
namespace Semmle.Extraction
{
/// <summary>
/// A class for interpreting path transformers specified using the environment
/// variable `CODEQL_PATH_TRANSFORMER`.
/// </summary>
public sealed class PathTransformer
{
public class InvalidPathTransformerException : Exception
{
public InvalidPathTransformerException(string message) :
base($"Invalid path transformer specification: {message}")
{ }
}
/// <summary>
/// A transformed path.
/// </summary>
public interface ITransformedPath
{
string Value { get; }
string Extension { get; }
string NameWithoutExtension { get; }
ITransformedPath? ParentDirectory { get; }
ITransformedPath WithSuffix(string suffix);
string DatabaseId { get; }
}
struct TransformedPath : ITransformedPath
{
public TransformedPath(string value) { this.value = value; }
readonly string value;
public string Value => value;
public string Extension => Path.GetExtension(value)?.Substring(1) ?? "";
public string NameWithoutExtension => Path.GetFileNameWithoutExtension(value);
public ITransformedPath? ParentDirectory
{
get
{
var dir = Path.GetDirectoryName(value);
if (dir is null)
return null;
var isWindowsDriveLetter = dir.Length == 2 && char.IsLetter(dir[0]) && dir[1] == ':';
if (isWindowsDriveLetter)
return null;
return new TransformedPath(FileUtils.ConvertToUnix(dir));
}
}
public ITransformedPath WithSuffix(string suffix) => new TransformedPath(value + suffix);
public string DatabaseId
{
get
{
var ret = value;
if (ret.Length >= 2 && ret[1] == ':' && Char.IsLower(ret[0]))
ret = Char.ToUpper(ret[0]) + "_" + ret.Substring(2);
return ret.Replace('\\', '/').Replace(":", "_");
}
}
public override int GetHashCode() => 11 * value.GetHashCode();
public override bool Equals(object? obj) => obj is TransformedPath tp && tp.value == value;
public override string ToString() => value;
}
readonly Func<string, string> transform;
/// <summary>
/// Returns the path obtained by transforming `path`.
/// </summary>
public ITransformedPath Transform(string path) => new TransformedPath(transform(path));
/// <summary>
/// Default constructor reads parameters from the environment.
/// </summary>
public PathTransformer(IPathCache pathCache) :
this(pathCache, Environment.GetEnvironmentVariable("CODEQL_PATH_TRANSFORMER") is string file ? File.ReadAllLines(file) : null)
{
}
/// <summary>
/// Creates a path transformer based on the specification in `lines`.
/// Throws `InvalidPathTransformerException` for invalid specifications.
/// </summary>
public PathTransformer(IPathCache pathCache, string[]? lines)
{
if (lines is null)
{
transform = path => FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path));
return;
}
var sections = ParsePathTransformerSpec(lines);
transform = path =>
{
path = FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path));
foreach (var section in sections)
{
if (section.Matches(path, out var transformed))
return transformed;
}
return path;
};
}
static IEnumerable<TransformerSection> ParsePathTransformerSpec(string[] lines)
{
var sections = new List<TransformerSection>();
try
{
int i = 0;
while (i < lines.Length && !lines[i].StartsWith("#"))
i++;
while (i < lines.Length)
{
var section = new TransformerSection(lines, ref i);
sections.Add(section);
}
if (sections.Count == 0)
throw new InvalidPathTransformerException("contains no sections.");
}
catch (InvalidFilePatternException ex)
{
throw new InvalidPathTransformerException(ex.Message);
}
return sections;
}
}
sealed class TransformerSection
{
readonly string name;
readonly List<FilePattern> filePatterns = new List<FilePattern>();
public TransformerSection(string[] lines, ref int i)
{
name = lines[i++].Substring(1); // skip the '#'
for (; i < lines.Length && !lines[i].StartsWith("#"); i++)
{
var line = lines[i];
if (!string.IsNullOrWhiteSpace(line))
filePatterns.Add(new FilePattern(line));
}
}
public bool Matches(string path, [NotNullWhen(true)] out string? transformed)
{
if (FilePattern.Matches(filePatterns, path, out var suffix))
{
transformed = FileUtils.ConvertToUnix(name) + suffix;
return true;
}
transformed = null;
return false;
}
}
}

View File

@@ -14,12 +14,6 @@ namespace Semmle.Extraction
public sealed class TrapWriter : IDisposable
{
public enum InnerPathComputation
{
ABSOLUTE,
RELATIVE
}
public enum CompressionMode
{
None,
@@ -45,7 +39,7 @@ namespace Semmle.Extraction
readonly CompressionMode TrapCompression;
public TrapWriter(ILogger logger, string outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression)
public TrapWriter(ILogger logger, PathTransformer.ITransformedPath outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression)
{
Logger = logger;
TrapCompression = trapCompression;
@@ -107,16 +101,17 @@ namespace Semmle.Extraction
/// Adds the specified input file to the source archive. It may end up in either the normal or long path area
/// of the source archive, depending on the length of its full path.
/// </summary>
/// <param name="inputPath">The path to the input file.</param>
/// <param name="originalPath">The path to the input file.</param>
/// <param name="transformedPath">The transformed path to the input file.</param>
/// <param name="inputEncoding">The encoding used by the input file.</param>
public void Archive(string inputPath, Encoding inputEncoding)
public void Archive(string originalPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding)
{
if (string.IsNullOrEmpty(archive)) return;
// Calling GetFullPath makes this use the canonical capitalisation, if the file exists.
string fullInputPath = Path.GetFullPath(inputPath);
string fullInputPath = Path.GetFullPath(originalPath);
ArchivePath(fullInputPath, inputEncoding);
ArchivePath(fullInputPath, transformedPath, inputEncoding);
}
/// <summary>
@@ -124,14 +119,11 @@ namespace Semmle.Extraction
/// </summary>
/// <param name="inputPath">The path of the file.</param>
/// <param name="contents">The contents of the file.</param>
public void Archive(string inputPath, string contents)
public void Archive(PathTransformer.ITransformedPath inputPath, string contents)
{
if (string.IsNullOrEmpty(archive)) return;
// Calling GetFullPath makes this use the canonical capitalisation, if the file exists.
string fullInputPath = Path.GetFullPath(inputPath);
ArchiveContents(fullInputPath, contents);
ArchiveContents(inputPath, contents);
}
/// <summary>
@@ -210,18 +202,19 @@ namespace Semmle.Extraction
/// source archive less than the system path limit of 260 characters.
/// </summary>
/// <param name="fullInputPath">The full path to the input file.</param>
/// <param name="transformedPath">The transformed path to the input file.</param>
/// <param name="inputEncoding">The encoding used by the input file.</param>
/// <exception cref="PathTooLongException">If the output path in the source archive would
/// exceed the system path limit of 260 characters.</exception>
private void ArchivePath(string fullInputPath, Encoding inputEncoding)
private void ArchivePath(string fullInputPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding)
{
string contents = File.ReadAllText(fullInputPath, inputEncoding);
ArchiveContents(fullInputPath, contents);
ArchiveContents(transformedPath, contents);
}
private void ArchiveContents(string fullInputPath, string contents)
private void ArchiveContents(PathTransformer.ITransformedPath transformedPath, string contents)
{
string dest = NestPaths(Logger, archive, fullInputPath, InnerPathComputation.ABSOLUTE);
string dest = NestPaths(Logger, archive, transformedPath.Value);
string tmpSrcFile = Path.GetTempFileName();
File.WriteAllText(tmpSrcFile, contents, UTF8);
try
@@ -236,14 +229,11 @@ namespace Semmle.Extraction
}
}
public static string NestPaths(ILogger logger, string? outerpath, string innerpath, InnerPathComputation innerPathComputation)
public static string NestPaths(ILogger logger, string? outerpath, string innerpath)
{
string nested = innerpath;
if (!string.IsNullOrEmpty(outerpath))
{
if (!Path.IsPathRooted(innerpath) && innerPathComputation == InnerPathComputation.ABSOLUTE)
innerpath = Path.GetFullPath(innerpath);
// Remove all leading path separators / or \
// For example, UNC paths have two leading \\
innerpath = innerpath.TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
@@ -276,13 +266,13 @@ namespace Semmle.Extraction
}
}
public static string TrapPath(ILogger logger, string? folder, string filename, TrapWriter.CompressionMode trapCompression)
public static string TrapPath(ILogger logger, string? folder, PathTransformer.ITransformedPath path, TrapWriter.CompressionMode trapCompression)
{
filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}";
var filename = $"{path.Value}.trap{TrapExtension(trapCompression)}";
if (string.IsNullOrEmpty(folder))
folder = Directory.GetCurrentDirectory();
return NestPaths(logger, folder, filename, InnerPathComputation.ABSOLUTE); ;
return NestPaths(logger, folder, filename);
}
}
}

View File

@@ -222,6 +222,29 @@ namespace Semmle.Util
this.pathStrategy = pathStrategy;
}
/// <summary>
/// Create a CanonicalPathCache.
/// </summary>
///
/// <remarks>
/// Creates the appropriate PathStrategy object which encapsulates
/// the correct algorithm. Falls back to different implementations
/// depending on platform.
/// </remarks>
///
/// <param name="maxCapacity">Size of the cache.</param>
/// <param name="symlinks">Policy for following symlinks.</param>
/// <returns>A new CanonicalPathCache.</returns>
public static CanonicalPathCache Create(ILogger logger, int maxCapacity)
{
var preserveSymlinks =
Environment.GetEnvironmentVariable("CODEQL_PRESERVE_SYMLINKS") == "true" ||
Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true";
return Create(logger, maxCapacity, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow);
}
/// <summary>
/// Create a CanonicalPathCache.
/// </summary>