diff --git a/change-notes/1.25/analysis-java.md b/change-notes/1.25/analysis-java.md index 7cdd9e491a2..ab11e5aaaf1 100644 --- a/change-notes/1.25/analysis-java.md +++ b/change-notes/1.25/analysis-java.md @@ -4,20 +4,26 @@ The following changes in version 1.25 affect Java analysis in all applications. ## General improvements -## New queries - -| **Query** | **Tags** | **Purpose** | -|-----------------------------|-----------|--------------------------------------------------------------------| - +The Java autobuilder has been improved to detect more Gradle Java versions. ## Changes to existing queries | **Query** | **Expected impact** | **Change** | |------------------------------|------------------------|-----------------------------------| - +| Hard-coded credential in API call (`java/hardcoded-credential-api-call`) | More results | The query now recognizes the `BasicAWSCredentials` class of the Amazon client SDK library with hardcoded access key/secret key. | +| Deserialization of user-controlled data (`java/unsafe-deserialization`) | Fewer false positive results | The query no longer reports results using `org.apache.commons.io.serialization.ValidatingObjectInputStream`. | +| Use of a broken or risky cryptographic algorithm (`java/weak-cryptographic-algorithm`) | More results | The query now recognizes the `MessageDigest.getInstance` method. | +| Use of a potentially broken or risky cryptographic algorithm (`java/potentially-weak-cryptographic-algorithm`) | More results | The query now recognizes the `MessageDigest.getInstance` method. | +| Reading from a world writable file (`java/world-writable-file-read`) | More results | The query now recognizes more JDK file operations. | ## Changes to libraries +* The data-flow library has been improved with more taint flow modeling for the + Collections framework and other classes of the JDK. This affects all security + queries using data flow and can yield additional results. +* The data-flow library has been improved with more taint flow modeling for the + Spring framework. This affects all security queries using data flow and can + yield additional results on project that rely on the Spring framework. * The data-flow library has been improved, which affects most security queries by potentially adding more results. Flow through methods now takes nested field reads/writes into account. For example, the library is able to track flow from `"taint"` to `sink()` via the method @@ -39,3 +45,5 @@ The following changes in version 1.25 affect Java analysis in all applications. } } ``` +* The library has been extended with more support for Java 14 features + (`switch` expressions and pattern-matching for `instanceof`). diff --git a/change-notes/1.25/analysis-python.md b/change-notes/1.25/analysis-python.md index 5d0fc69ec80..ed3496bc734 100644 --- a/change-notes/1.25/analysis-python.md +++ b/change-notes/1.25/analysis-python.md @@ -1,22 +1,9 @@ # Improvements to Python analysis -The following changes in version 1.25 affect Python analysis in all applications. - -## General improvements - - -## New queries - -| **Query** | **Tags** | **Purpose** | -|-----------------------------|-----------|--------------------------------------------------------------------| - - -## Changes to existing queries - -| **Query** | **Expected impact** | **Change** | -|----------------------------|------------------------|------------------------------------------------------------------| - - -## Changes to libraries - * Importing `semmle.python.web.HttpRequest` will no longer import `UntrustedStringKind` transitively. `UntrustedStringKind` is the most commonly used non-abstract subclass of `ExternalStringKind`. If not imported (by one mean or another), taint-tracking queries that concern `ExternalStringKind` will not produce any results. Please ensure such queries contain an explicit import (`import semmle.python.security.strings.Untrusted`). +* Added model of taint sources for HTTP servers using `http.server`. +* Added taint modeling of routed parameters in Flask. +* Improved modeling of built-in methods on strings for taint tracking. +* Improved classification of test files. +* New class `BoundMethodValue` represents a bound method during runtime. +* The query `py/command-line-injection` now recognizes command execution with the `fabric` and `invoke` Python libraries. diff --git a/csharp/extractor/Semmle.Extraction.CIL/Context.cs b/csharp/extractor/Semmle.Extraction.CIL/Context.cs index 618293553f0..e3fee31c57e 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Context.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Context.cs @@ -43,7 +43,7 @@ namespace Semmle.Extraction.CIL namespaceFactory = new CachedFunction(n => CreateNamespace(mdReader.GetString(n))); namespaceDefinitionFactory = new CachedFunction(CreateNamespace); sourceFiles = new CachedFunction(path => new Entities.PdbSourceFile(this, path)); - folders = new CachedFunction(path => new Entities.Folder(this, path)); + folders = new CachedFunction(path => new Entities.Folder(this, path)); sourceLocations = new CachedFunction(location => new Entities.PdbSourceLocation(this, location)); defaultGenericContext = new EmptyContext(this); diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs index f633af96db7..a38575d2f61 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs @@ -5,6 +5,7 @@ using Semmle.Util.Logging; using System; using Semmle.Extraction.Entities; using System.IO; +using Semmle.Util; namespace Semmle.Extraction.CIL.Entities { @@ -134,9 +135,12 @@ namespace Semmle.Extraction.CIL.Entities extracted = false; try { - var extractor = new Extractor(false, assemblyPath, logger); - var project = layout.LookupProjectOrDefault(assemblyPath); - using (var trapWriter = project.CreateTrapWriter(logger, assemblyPath + ".cil", true, trapCompression)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + var extractor = new Extractor(false, assemblyPath, logger, pathTransformer); + var transformedAssemblyPath = pathTransformer.Transform(assemblyPath); + var project = layout.LookupProjectOrDefault(transformedAssemblyPath); + using (var trapWriter = project.CreateTrapWriter(logger, transformedAssemblyPath.WithSuffix(".cil"), true, trapCompression)) { trapFile = trapWriter.TrapFile; if (nocache || !System.IO.File.Exists(trapFile)) diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs index 2506ea6ee00..175af111615 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs @@ -1,4 +1,4 @@ -using System.Collections.Generic; +using System.Collections.Generic; using System.IO; namespace Semmle.Extraction.CIL.Entities @@ -13,37 +13,38 @@ namespace Semmle.Extraction.CIL.Entities public class File : LabelledEntity, IFile { - protected readonly string path; + protected readonly string OriginalPath; + protected readonly PathTransformer.ITransformedPath TransformedPath; public File(Context cx, string path) : base(cx) { - this.path = Semmle.Extraction.Entities.File.PathAsDatabaseString(path); + this.OriginalPath = path; + TransformedPath = cx.cx.Extractor.PathTransformer.Transform(OriginalPath); } public override void WriteId(TextWriter trapFile) { - trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path)); + trapFile.Write(TransformedPath.DatabaseId); } public override bool Equals(object? obj) { - return GetType() == obj?.GetType() && path == ((File)obj).path; + return GetType() == obj?.GetType() && OriginalPath == ((File)obj).OriginalPath; } - public override int GetHashCode() => 11 * path.GetHashCode(); + public override int GetHashCode() => 11 * OriginalPath.GetHashCode(); public override IEnumerable Contents { get { - var directoryName = System.IO.Path.GetDirectoryName(path); - if (directoryName is null) - throw new InternalError($"Directory name for path '{path}' is null."); - - var parent = cx.CreateFolder(directoryName); - yield return parent; - yield return Tuples.containerparent(parent, this); - yield return Tuples.files(this, path, System.IO.Path.GetFileNameWithoutExtension(path), System.IO.Path.GetExtension(path).Substring(1)); + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir) + { + var parent = cx.CreateFolder(dir); + yield return parent; + yield return Tuples.containerparent(parent, this); + } + yield return Tuples.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension); } } @@ -69,9 +70,9 @@ namespace Semmle.Extraction.CIL.Entities var text = file.Contents; if (text == null) - cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", path)); + cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", OriginalPath)); else - cx.cx.TrapWriter.Archive(path, text); + cx.cx.TrapWriter.Archive(TransformedPath, text); yield return Tuples.file_extraction_mode(this, 2); } diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs index b6597a3eba9..0769ac48106 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs @@ -9,16 +9,16 @@ namespace Semmle.Extraction.CIL.Entities public sealed class Folder : LabelledEntity, IFolder { - readonly string path; + readonly PathTransformer.ITransformedPath TransformedPath; - public Folder(Context cx, string path) : base(cx) + public Folder(Context cx, PathTransformer.ITransformedPath path) : base(cx) { - this.path = path; + this.TransformedPath = path; } public override void WriteId(TextWriter trapFile) { - trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path)); + trapFile.Write(TransformedPath.DatabaseId); } public override string IdSuffix => ";folder"; @@ -27,25 +27,21 @@ namespace Semmle.Extraction.CIL.Entities { get { - // On Posix, we could get a Windows directory of the form "C:" - bool windowsDriveLetter = path.Length == 2 && char.IsLetter(path[0]) && path[1] == ':'; - - var parent = Path.GetDirectoryName(path); - if (parent != null && !windowsDriveLetter) + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath parent) { var parentFolder = cx.CreateFolder(parent); yield return parentFolder; yield return Tuples.containerparent(parentFolder, this); } - yield return Tuples.folders(this, Semmle.Extraction.Entities.File.PathAsDatabaseString(path), Path.GetFileName(path)); + yield return Tuples.folders(this, TransformedPath.Value, TransformedPath.NameWithoutExtension); } } public override bool Equals(object? obj) { - return obj is Folder folder && path == folder.path; + return obj is Folder folder && TransformedPath == folder.TransformedPath; } - public override int GetHashCode() => path.GetHashCode(); + public override int GetHashCode() => TransformedPath.GetHashCode(); } } diff --git a/csharp/extractor/Semmle.Extraction.CIL/Factories.cs b/csharp/extractor/Semmle.Extraction.CIL/Factories.cs index f2f98b64d17..d43eaf780a9 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Factories.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Factories.cs @@ -201,7 +201,7 @@ namespace Semmle.Extraction.CIL #region Locations readonly CachedFunction sourceFiles; - readonly CachedFunction folders; + readonly CachedFunction folders; readonly CachedFunction sourceLocations; /// @@ -216,7 +216,7 @@ namespace Semmle.Extraction.CIL /// /// The path of the folder. /// A folder entity. - public Folder CreateFolder(string path) => folders[path]; + public Folder CreateFolder(PathTransformer.ITransformedPath path) => folders[path]; /// /// Creates a source location. diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs b/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs index 34dece8e160..cc888326dfe 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs @@ -27,13 +27,16 @@ namespace Semmle.Extraction.CSharp public readonly bool AddAssemblyTrapPrefix; - public Analyser(IProgressMonitor pm, ILogger logger, bool addAssemblyTrapPrefix) + public readonly PathTransformer PathTransformer; + + public Analyser(IProgressMonitor pm, ILogger logger, bool addAssemblyTrapPrefix, PathTransformer pathTransformer) { Logger = logger; AddAssemblyTrapPrefix = addAssemblyTrapPrefix; Logger.Log(Severity.Info, "EXTRACTION STARTING at {0}", DateTime.Now); stopWatch.Start(); progressMonitor = pm; + PathTransformer = pathTransformer; } CSharpCompilation compilation; @@ -67,7 +70,7 @@ namespace Semmle.Extraction.CSharp layout = new Layout(); this.options = options; this.compilation = compilation; - extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger); + extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger, PathTransformer); LogDiagnostics(); SetReferencePaths(); @@ -117,7 +120,7 @@ namespace Semmle.Extraction.CSharp { compilation = compilationIn; layout = new Layout(); - extractor = new Extraction.Extractor(true, null, Logger); + extractor = new Extraction.Extractor(true, null, Logger, PathTransformer); this.options = options; LogExtractorInfo(Extraction.Extractor.Version); SetReferencePaths(); @@ -230,9 +233,10 @@ namespace Semmle.Extraction.CSharp try { var assemblyPath = extractor.OutputPath; + var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); var assembly = compilation.Assembly; - var projectLayout = layout.LookupProjectOrDefault(assemblyPath); - var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression); + var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); + var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression); compilationTrapFile = trapWriter; // Dispose later var cx = extractor.CreateContext(compilation.Clone(), trapWriter, new AssemblyScope(assembly, assemblyPath, true), AddAssemblyTrapPrefix); @@ -260,8 +264,9 @@ namespace Semmle.Extraction.CSharp stopwatch.Start(); var assemblyPath = r.FilePath; - var projectLayout = layout.LookupProjectOrDefault(assemblyPath); - using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression)) + var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); + var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); + using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression)) { var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile); @@ -360,16 +365,17 @@ namespace Semmle.Extraction.CSharp var stopwatch = new Stopwatch(); stopwatch.Start(); var sourcePath = tree.FilePath; + var transformedSourcePath = PathTransformer.Transform(sourcePath); - var projectLayout = layout.LookupProjectOrNull(sourcePath); + var projectLayout = layout.LookupProjectOrNull(transformedSourcePath); bool excluded = projectLayout == null; - string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, sourcePath, options.TrapCompression); + string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, transformedSourcePath, options.TrapCompression); bool upToDate = false; if (!excluded) { // compilation.Clone() is used to allow symbols to be garbage collected. - using (var trapWriter = projectLayout.CreateTrapWriter(Logger, sourcePath, false, options.TrapCompression)) + using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedSourcePath, false, options.TrapCompression)) { upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile); diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs index 95c568047c5..9f96b03f9f3 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using Semmle.Util; namespace Semmle.Extraction.CSharp.Entities { @@ -22,7 +23,7 @@ namespace Semmle.Extraction.CSharp.Entities { Extraction.Entities.Assembly.CreateOutputAssembly(cx); - trapFile.compilations(this, Extraction.Entities.File.PathAsDatabaseString(cwd)); + trapFile.compilations(this, FileUtils.ConvertToUnix(cwd)); // Arguments int index = 0; diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs index 1b5bf15d187..261ce7f9e43 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs @@ -76,16 +76,16 @@ namespace Semmle.Extraction.CSharp return ExitCode.Ok; } - using (var analyser = new Analyser(new LogProgressMonitor(logger), logger, commandLineArguments.AssemblySensitiveTrap)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + + using (var analyser = new Analyser(new LogProgressMonitor(logger), logger, commandLineArguments.AssemblySensitiveTrap, pathTransformer)) using (var references = new BlockingCollection()) { try { var compilerVersion = new CompilerVersion(commandLineArguments); - bool preserveSymlinks = Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true"; - var canonicalPathCache = CanonicalPathCache.Create(logger, 1000, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow); - if (compilerVersion.SkipExtraction) { logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason); @@ -318,7 +318,10 @@ namespace Semmle.Extraction.CSharp ILogger logger, CommonOptions options) { - using (var analyser = new Analyser(pm, logger, false)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + + using (var analyser = new Analyser(pm, logger, false, pathTransformer)) using (var references = new BlockingCollection()) { try diff --git a/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs b/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs new file mode 100644 index 00000000000..dfff75ea18b --- /dev/null +++ b/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs @@ -0,0 +1,48 @@ +using Xunit; + +namespace Semmle.Extraction.Tests +{ + public class FilePatternTests + { + [Fact] + public void TestRegexCompilation() + { + var fp = new FilePattern("/hadoop*"); + Assert.Equal("^hadoop[^/]*.*", fp.RegexPattern); + fp = new FilePattern("**/org/apache/hadoop"); + Assert.Equal("^.*/org/apache/hadoop.*", fp.RegexPattern); + fp = new FilePattern("hadoop-common/**/test// "); + Assert.Equal("^hadoop-common/.*/test(?/).*", fp.RegexPattern); + fp = new FilePattern(@"-C:\agent\root\asdf//"); + Assert.Equal("^C:/agent/root/asdf(?/).*", fp.RegexPattern); + fp = new FilePattern(@"-C:\agent+\[root]\asdf//"); + Assert.Equal(@"^C:/agent\+/\[root]/asdf(?/).*", fp.RegexPattern); + } + + [Fact] + public void TestMatching() + { + var fp1 = new FilePattern(@"C:\agent\root\abc//"); + var fp2 = new FilePattern(@"C:\agent\root\def//ghi"); + var patterns = new[] { fp1, fp2 }; + + var success = FilePattern.Matches(patterns, @"C:\agent\root\abc\file.cs", out var s); + Assert.True(success); + Assert.Equal("/file.cs", s); + + success = FilePattern.Matches(patterns, @"C:\agent\root\def\ghi\file.cs", out s); + Assert.True(success); + Assert.Equal("/ghi/file.cs", s); + + success = FilePattern.Matches(patterns, @"C:\agent\root\def\file.cs", out s); + Assert.False(success); + } + + [Fact] + public void TestInvalidPatterns() + { + Assert.Throws(() => new FilePattern("/abc//def//ghi")); + Assert.Throws(() => new FilePattern("/abc**def")); + } + } +} diff --git a/csharp/extractor/Semmle.Extraction.Tests/Layout.cs b/csharp/extractor/Semmle.Extraction.Tests/Layout.cs index c66b804f3a7..a5f36f0c19d 100644 --- a/csharp/extractor/Semmle.Extraction.Tests/Layout.cs +++ b/csharp/extractor/Semmle.Extraction.Tests/Layout.cs @@ -1,10 +1,30 @@ -using System.IO; +using System.IO; using Xunit; using Semmle.Util.Logging; using System.Runtime.InteropServices; namespace Semmle.Extraction.Tests { + struct TransformedPathStub : PathTransformer.ITransformedPath + { + readonly string value; + public TransformedPathStub(string value) => this.value = value; + public string Value => value; + + public string Extension => throw new System.NotImplementedException(); + + public string NameWithoutExtension => throw new System.NotImplementedException(); + + public PathTransformer.ITransformedPath ParentDirectory => throw new System.NotImplementedException(); + + public string DatabaseId => throw new System.NotImplementedException(); + + public PathTransformer.ITransformedPath WithSuffix(string suffix) + { + throw new System.NotImplementedException(); + } + } + public class Layout { readonly ILogger Logger = new LoggerMock(); @@ -13,12 +33,12 @@ namespace Semmle.Extraction.Tests public void TestDefaultLayout() { var layout = new Semmle.Extraction.Layout(null, null, null); - var project = layout.LookupProjectOrNull("foo.cs"); + var project = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")); Assert.NotNull(project); // All files are mapped when there's no layout file. - Assert.True(layout.FileInLayout("foo.cs")); + Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs"))); // Test trap filename var tmpDir = Path.GetTempPath(); @@ -30,13 +50,13 @@ namespace Semmle.Extraction.Tests Assert.NotEqual(Directory.GetCurrentDirectory(), tmpDir); return; } - var f1 = project!.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + var f1 = project!.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz"); Assert.Equal(f1, g1); // Test trap file generation - var trapwriterFilename = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - using (var trapwriter = project.CreateTrapWriter(Logger, "foo.cs", false, TrapWriter.CompressionMode.Gzip)) + var trapwriterFilename = project.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + using (var trapwriter = project.CreateTrapWriter(Logger, new TransformedPathStub("foo.cs"), false, TrapWriter.CompressionMode.Gzip)) { trapwriter.Emit("1=*"); Assert.False(File.Exists(trapwriterFilename)); @@ -65,25 +85,24 @@ namespace Semmle.Extraction.Tests var layout = new Semmle.Extraction.Layout(null, null, "layout.txt"); // Test general pattern matching - Assert.True(layout.FileInLayout("bar.cs")); - Assert.False(layout.FileInLayout("foo.cs")); - Assert.False(layout.FileInLayout("goo.cs")); - Assert.False(layout.FileInLayout("excluded/bar.cs")); - Assert.True(layout.FileInLayout("excluded/foo.cs")); - Assert.True(layout.FileInLayout("included/foo.cs")); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("foo.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("goo.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("excluded/bar.cs"))); + Assert.True(layout.FileInLayout(new TransformedPathStub("excluded/foo.cs"))); + Assert.True(layout.FileInLayout(new TransformedPathStub("included/foo.cs"))); // Test the trap file - var project = layout.LookupProjectOrNull("bar.cs"); + var project = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs")); Assert.NotNull(project); - - var trapwriterFilename = project!.GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip); - Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE), + var trapwriterFilename = project!.GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip); + Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz"), trapwriterFilename); // Test the source archive - var trapWriter = project.CreateTrapWriter(Logger, "bar.cs", false, TrapWriter.CompressionMode.Gzip); - trapWriter.Archive("layout.txt", System.Text.Encoding.ASCII); - var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt", TrapWriter.InnerPathComputation.ABSOLUTE); + var trapWriter = project.CreateTrapWriter(Logger, new TransformedPathStub("bar.cs"), false, TrapWriter.CompressionMode.Gzip); + trapWriter.Archive("layout.txt", new TransformedPathStub("layout.txt"), System.Text.Encoding.ASCII); + var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt"); Assert.True(File.Exists(writtenFile)); File.Delete("layout.txt"); } @@ -93,11 +112,11 @@ namespace Semmle.Extraction.Tests { // When you specify both a trap file and a layout, use the trap file. var layout = new Semmle.Extraction.Layout(Path.GetFullPath("snapshot\\trap"), null, "something.txt"); - Assert.True(layout.FileInLayout("bar.cs")); - var subProject = layout.LookupProjectOrNull("foo.cs"); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + var subProject = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")); Assert.NotNull(subProject); - var f1 = subProject!.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + var f1 = subProject!.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz"); Assert.Equal(f1, g1); } @@ -123,30 +142,30 @@ namespace Semmle.Extraction.Tests var layout = new Semmle.Extraction.Layout(null, null, "layout.txt"); // Use Section 2 - Assert.True(layout.FileInLayout("bar.cs")); - var subProject = layout.LookupProjectOrNull("bar.cs"); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + var subProject = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs")); Assert.NotNull(subProject); - var f1 = subProject!.GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + var f1 = subProject!.GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz"); Assert.Equal(f1, g1); // Use Section 1 - Assert.True(layout.FileInLayout("foo.cs")); - subProject = layout.LookupProjectOrNull("foo.cs"); + Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs"))); + subProject = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")); Assert.NotNull(subProject); - var f2 = subProject!.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + var f2 = subProject!.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz"); Assert.Equal(f2, g2); // boo.dll is not in the layout, so use layout from first section. - Assert.False(layout.FileInLayout("boo.dll")); - var f3 = layout.LookupProjectOrDefault("boo.dll").GetTrapPath(Logger, "boo.dll", TrapWriter.CompressionMode.Gzip); - var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + Assert.False(layout.FileInLayout(new TransformedPathStub("boo.dll"))); + var f3 = layout.LookupProjectOrDefault(new TransformedPathStub("boo.dll")).GetTrapPath(Logger, new TransformedPathStub("boo.dll"), TrapWriter.CompressionMode.Gzip); + var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz"); Assert.Equal(f3, g3); // boo.cs is not in the layout, so return null - Assert.False(layout.FileInLayout("boo.cs")); - Assert.Null(layout.LookupProjectOrNull("boo.cs")); + Assert.False(layout.FileInLayout(new TransformedPathStub("boo.cs"))); + Assert.Null(layout.LookupProjectOrNull(new TransformedPathStub("boo.cs"))); } [Fact] diff --git a/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs new file mode 100644 index 00000000000..b0f0ba8c51f --- /dev/null +++ b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs @@ -0,0 +1,45 @@ +using Semmle.Util; +using Xunit; + +namespace Semmle.Extraction.Tests +{ + class PathCacheStub : IPathCache + { + public string GetCanonicalPath(string path) => path; + } + + public class PathTransformerTests + { + [Fact] + public void TestTransformerFile() + { + var spec = new string[] + { + @"#D:\src", + @"C:\agent*\src//", + @"-C:\agent*\src\external", + @"", + @"#empty", + @"", + @"#src2", + @"/agent*//src", + @"", + @"#optsrc", + @"opt/src//" + }; + + var pathTransformer = new PathTransformer(new PathCacheStub(), spec); + + // Windows-style matching + Assert.Equal(@"C:/bar.cs", pathTransformer.Transform(@"C:\bar.cs").Value); + Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent42\src\file.cs").Value); + Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent43\src\file.cs").Value); + Assert.Equal(@"C:/agent43/src/external/file.cs", pathTransformer.Transform(@"C:\agent43\src\external\file.cs").Value); + + // Linux-style matching + Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent/src/file.cs").Value); + Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent42/src/file.cs").Value); + Assert.Equal(@"optsrc/file.cs", pathTransformer.Transform(@"/opt/src/file.cs").Value); + } + } +} diff --git a/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs b/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs index fd7f77f427b..54da865689b 100644 --- a/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs +++ b/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs @@ -14,7 +14,7 @@ namespace Semmle.Extraction.Tests string tempDir = System.IO.Path.GetTempPath(); string root1, root2, root3; - if(Win32.IsWindows()) + if (Win32.IsWindows()) { root1 = "E:"; root2 = "e:"; @@ -27,32 +27,21 @@ namespace Semmle.Extraction.Tests root3 = "/"; } - string formattedTempDir = tempDir.Replace('/', '\\').Replace(':', '_').Trim('\\'); - var logger = new LoggerMock(); - System.IO.Directory.SetCurrentDirectory(tempDir); - if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) - { - // `Directory.SetCurrentDirectory()` doesn't seem to work on macOS, - // so disable this test on macOS, for now - Assert.NotEqual(Directory.GetCurrentDirectory(), tempDir); - return; - } + Assert.Equal($@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\')); - Assert.Equal($@"C:\Temp\source_archive\{formattedTempDir}\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/','\\')); + Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); - - Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs").Replace('/', '\\')); } class LoggerMock : ILogger diff --git a/csharp/extractor/Semmle.Extraction/Entities/File.cs b/csharp/extractor/Semmle.Extraction/Entities/File.cs index eba6fab83b7..5b3f79acdd3 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/File.cs @@ -10,93 +10,55 @@ namespace Semmle.Extraction.Entities File(Context cx, string path) : base(cx, path) { - Path = path; + OriginalPath = path; + TransformedPathLazy = new Lazy(() => Context.Extractor.PathTransformer.Transform(OriginalPath)); } - public string Path - { - get; - private set; - } + readonly string OriginalPath; + readonly Lazy TransformedPathLazy; + PathTransformer.ITransformedPath TransformedPath => TransformedPathLazy.Value; - public string DatabasePath => PathAsDatabaseId(Path); - - public override bool NeedsPopulation => Context.DefinesFile(Path) || Path == Context.Extractor.OutputPath; + public override bool NeedsPopulation => Context.DefinesFile(OriginalPath) || OriginalPath == Context.Extractor.OutputPath; public override void Populate(TextWriter trapFile) { - if (Path == null) + trapFile.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension); + + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir) + trapFile.containerparent(Folder.Create(Context, dir), this); + + var fromSource = TransformedPath.Extension.ToLowerInvariant().Equals("cs"); + if (fromSource) { - trapFile.files(this, "", "", ""); - } - else - { - var fi = new FileInfo(Path); - - string extension = fi.Extension ?? ""; - string name = fi.Name; - name = name.Substring(0, name.Length - extension.Length); - int fromSource = extension.ToLowerInvariant().Equals(".cs") ? 1 : 2; - - // remove the dot from the extension - if (extension.Length > 0) - extension = extension.Substring(1); - trapFile.files(this, PathAsDatabaseString(Path), name, extension); - - trapFile.containerparent(Folder.Create(Context, fi.Directory), this); - if (fromSource == 1) + foreach (var text in Context.Compilation.SyntaxTrees. + Where(t => t.FilePath == OriginalPath). + Select(tree => tree.GetText())) { - foreach (var text in Context.Compilation.SyntaxTrees. - Where(t => t.FilePath == Path). - Select(tree => tree.GetText())) - { - var rawText = text.ToString() ?? ""; - var lineCounts = LineCounter.ComputeLineCounts(rawText); - if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++; + var rawText = text.ToString() ?? ""; + var lineCounts = LineCounter.ComputeLineCounts(rawText); + if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++; - trapFile.numlines(this, lineCounts); - Context.TrapWriter.Archive(fi.FullName, text.Encoding ?? System.Text.Encoding.Default); - } + trapFile.numlines(this, lineCounts); + Context.TrapWriter.Archive(OriginalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default); } - - trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0); } + + trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0); } public override void WriteId(System.IO.TextWriter trapFile) { - if (Path is null) - trapFile.Write("GENERATED;sourcefile"); - else - { - trapFile.Write(DatabasePath); - trapFile.Write(";sourcefile"); - } + trapFile.Write(TransformedPath.DatabaseId); + trapFile.Write(";sourcefile"); } - /// - /// Converts a path string into a string to use as an ID - /// in the QL database. - /// - /// An absolute path. - /// The database ID. - public static string PathAsDatabaseId(string path) - { - if (path.Length >= 2 && path[1] == ':' && Char.IsLower(path[0])) - path = Char.ToUpper(path[0]) + "_" + path.Substring(2); - return path.Replace('\\', '/').Replace(":", "_"); - } - - public static string PathAsDatabaseString(string path) => path.Replace('\\', '/'); - public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, (typeof(File), path), path); public static File CreateGenerated(Context cx) => GeneratedFile.Create(cx); class GeneratedFile : File { - GeneratedFile(Context cx) - : base(cx, "") { } + GeneratedFile(Context cx) : base(cx, "") { } public override bool NeedsPopulation => true; diff --git a/csharp/extractor/Semmle.Extraction/Entities/Folder.cs b/csharp/extractor/Semmle.Extraction/Entities/Folder.cs index 86653e19244..05d552f0352 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/Folder.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/Folder.cs @@ -2,65 +2,44 @@ using System.IO; namespace Semmle.Extraction.Entities { - sealed class Folder : CachedEntity + sealed class Folder : CachedEntity { - Folder(Context cx, DirectoryInfo init) - : base(cx, init) - { - Path = init.FullName; - } - - public string Path - { - get; - private set; - } - - public string DatabasePath => File.PathAsDatabaseId(Path); + Folder(Context cx, PathTransformer.ITransformedPath init) : base(cx, init) { } public override void Populate(TextWriter trapFile) { - // Ensure that the name of the root directory is consistent - // with the XmlTrapWriter. - // Linux/Windows: java.io.File.getName() returns "" - // On Linux: System.IO.DirectoryInfo.Name returns "/" - // On Windows: System.IO.DirectoryInfo.Name returns "L:\" - string shortName = symbol.Parent == null ? "" : symbol.Name; - - trapFile.folders(this, File.PathAsDatabaseString(Path), shortName); - if (symbol.Parent != null) - { - trapFile.containerparent(Create(Context, symbol.Parent), this); - } + trapFile.folders(this, symbol.Value, symbol.NameWithoutExtension); + if (symbol.ParentDirectory is PathTransformer.ITransformedPath parent) + trapFile.containerparent(Create(Context, parent), this); } public override bool NeedsPopulation => true; public override void WriteId(System.IO.TextWriter trapFile) { - trapFile.Write(DatabasePath); + trapFile.Write(symbol.DatabaseId); trapFile.Write(";folder"); } - public static Folder Create(Context cx, DirectoryInfo folder) => + public static Folder Create(Context cx, PathTransformer.ITransformedPath folder) => FolderFactory.Instance.CreateEntity(cx, folder, folder); public override Microsoft.CodeAnalysis.Location? ReportingLocation => null; - class FolderFactory : ICachedEntityFactory + class FolderFactory : ICachedEntityFactory { public static readonly FolderFactory Instance = new FolderFactory(); - public Folder Create(Context cx, DirectoryInfo init) => new Folder(cx, init); + public Folder Create(Context cx, PathTransformer.ITransformedPath init) => new Folder(cx, init); } public override TrapStackBehaviour TrapStackBehaviour => TrapStackBehaviour.NoLabel; - public override int GetHashCode() => Path.GetHashCode(); + public override int GetHashCode() => symbol.GetHashCode(); public override bool Equals(object? obj) { - return obj is Folder folder && folder.Path == Path; + return obj is Folder folder && Equals(folder.symbol, symbol); } } } diff --git a/csharp/extractor/Semmle.Extraction/Extractor.cs b/csharp/extractor/Semmle.Extraction/Extractor.cs index c08dd726abf..02dfc3bb11a 100644 --- a/csharp/extractor/Semmle.Extraction/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction/Extractor.cs @@ -81,6 +81,11 @@ namespace Semmle.Extraction /// ILogger Logger { get; } + /// + /// The path transformer to apply. + /// + PathTransformer PathTransformer { get; } + /// /// Creates a new context. /// @@ -112,11 +117,14 @@ namespace Semmle.Extraction /// /// If the extraction is standalone. /// The name of the output DLL/EXE, or null if not specified (standalone extraction). - public Extractor(bool standalone, string outputPath, ILogger logger) + /// The object used for logging. + /// The object used for path transformations. + public Extractor(bool standalone, string outputPath, ILogger logger, PathTransformer pathTransformer) { Standalone = standalone; OutputPath = outputPath; Logger = logger; + PathTransformer = pathTransformer; } // Limit the number of error messages in the log file @@ -206,5 +214,7 @@ namespace Semmle.Extraction public ILogger Logger { get; private set; } public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})"; + + public PathTransformer PathTransformer { get; } } } diff --git a/csharp/extractor/Semmle.Extraction/FilePattern.cs b/csharp/extractor/Semmle.Extraction/FilePattern.cs new file mode 100644 index 00000000000..2d61badd2f2 --- /dev/null +++ b/csharp/extractor/Semmle.Extraction/FilePattern.cs @@ -0,0 +1,131 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Diagnostics.CodeAnalysis; +using Semmle.Util; + +namespace Semmle.Extraction +{ + public sealed class InvalidFilePatternException : Exception + { + public InvalidFilePatternException(string pattern, string message) : + base($"Invalid file pattern '{pattern}': {message}") + { } + } + + /// + /// A file pattern, as used in either an extractor layout file or + /// a path transformer file. + /// + public sealed class FilePattern + { + /// + /// Whether this is an inclusion pattern. + /// + public bool Include { get; } + + public FilePattern(string pattern) + { + Include = true; + if (pattern.StartsWith("-")) + { + pattern = pattern.Substring(1); + Include = false; + } + pattern = FileUtils.ConvertToUnix(pattern.Trim()).TrimStart('/'); + RegexPattern = BuildRegex(pattern).ToString(); + } + + /// + /// Constructs a regex string from a file pattern. Throws + /// `InvalidFilePatternException` for invalid patterns. + /// + static StringBuilder BuildRegex(string pattern) + { + bool HasCharAt(int i, Predicate p) => + i >= 0 && i < pattern.Length && p(pattern[i]); + var sb = new StringBuilder(); + var i = 0; + var seenDoubleSlash = false; + sb.Append('^'); + while (i < pattern.Length) + { + if (pattern[i] == '/') + { + if (HasCharAt(i + 1, c => c == '/')) + { + if (seenDoubleSlash) + throw new InvalidFilePatternException(pattern, "'//' is allowed at most once."); + sb.Append("(?/)"); + i += 2; + seenDoubleSlash = true; + } + else + { + sb.Append('/'); + i++; + } + } + else if (pattern[i] == '*') + { + if (HasCharAt(i + 1, c => c == '*')) + { + if (HasCharAt(i - 1, c => c != '/')) + throw new InvalidFilePatternException(pattern, "'**' preceeded by non-`/` character."); + if (HasCharAt(i + 2, c => c != '/')) + throw new InvalidFilePatternException(pattern, "'**' succeeded by non-`/` character"); + sb.Append(".*"); + i += 2; + } + else + { + sb.Append("[^/]*"); + i++; + } + } + else + sb.Append(Regex.Escape(pattern[i++].ToString())); + } + return sb.Append(".*"); + } + + + /// + /// The regex pattern compiled from this file pattern. + /// + public string RegexPattern { get; } + + /// + /// Returns `true` if the set of file patterns `patterns` match the path `path`. + /// If so, `transformerSuffix` will contain the part of `path` that needs to be + /// suffixed when using path transformers. + /// + public static bool Matches(IEnumerable patterns, string path, [NotNullWhen(true)] out string? transformerSuffix) + { + path = FileUtils.ConvertToUnix(path).TrimStart('/'); + + foreach (var pattern in patterns.Reverse()) + { + var m = new Regex(pattern.RegexPattern).Match(path); + if (m.Success) + { + if (pattern.Include) + { + transformerSuffix = m.Groups.TryGetValue("doubleslash", out var group) + ? path.Substring(group.Index) + : path; + return true; + } + + transformerSuffix = null; + return false; + } + } + + transformerSuffix = null; + return false; + } + } +} \ No newline at end of file diff --git a/csharp/extractor/Semmle.Extraction/Layout.cs b/csharp/extractor/Semmle.Extraction/Layout.cs index 82b266318c1..d740d2c05b9 100644 --- a/csharp/extractor/Semmle.Extraction/Layout.cs +++ b/csharp/extractor/Semmle.Extraction/Layout.cs @@ -54,14 +54,15 @@ namespace Semmle.Extraction /// /// The source file. /// The full filepath of the trap file. - public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression); + public string GetTrapPath(ILogger logger, PathTransformer.ITransformedPath srcFile, TrapWriter.CompressionMode trapCompression) => + TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression); /// /// Creates a trap writer for a given source/assembly file. /// /// The source file. /// A newly created TrapWriter. - public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => + public TrapWriter CreateTrapWriter(ILogger logger, PathTransformer.ITransformedPath srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression); } @@ -73,7 +74,7 @@ namespace Semmle.Extraction /// /// The file to look up. /// The relevant subproject, or null if not found. - public SubProject? LookupProjectOrNull(string sourceFile) + public SubProject? LookupProjectOrNull(PathTransformer.ITransformedPath sourceFile) { if (!useLayoutFile) return DefaultProject; @@ -89,7 +90,7 @@ namespace Semmle.Extraction /// /// The file to look up. /// The relevant subproject, or DefaultProject if not found. - public SubProject LookupProjectOrDefault(string sourceFile) + public SubProject LookupProjectOrDefault(PathTransformer.ITransformedPath sourceFile) { return LookupProjectOrNull(sourceFile) ?? DefaultProject; } @@ -134,7 +135,7 @@ namespace Semmle.Extraction /// /// The absolute path of the file to query. /// True iff there is no layout file or the layout file specifies the file. - public bool FileInLayout(string path) => LookupProjectOrNull(path) != null; + public bool FileInLayout(PathTransformer.ITransformedPath path) => LookupProjectOrNull(path) != null; void ReadLayoutFile(string layout) { @@ -167,33 +168,7 @@ namespace Semmle.Extraction sealed class LayoutBlock { - struct Condition - { - private readonly bool include; - private readonly string prefix; - - public bool Include => include; - - public string Prefix => prefix; - - public Condition(string line) - { - include = false; - if (line.StartsWith("-")) - line = line.Substring(1); - else - include = true; - prefix = Normalise(line.Trim()); - } - - static public string Normalise(string path) - { - path = Path.GetFullPath(path); - return path.Replace('\\', '/'); - } - } - - private readonly List conditions = new List(); + private readonly List filePatterns = new List(); public readonly Layout.SubProject Directories; @@ -219,22 +194,10 @@ namespace Semmle.Extraction ReadVariable("ODASA_BUILD_ERROR_DIR", lines[i++]); while (i < lines.Length && !lines[i].StartsWith("#")) { - conditions.Add(new Condition(lines[i++])); + filePatterns.Add(new FilePattern(lines[i++])); } } - public bool Matches(string path) - { - bool matches = false; - path = Condition.Normalise(path); - foreach (Condition condition in conditions) - { - if (condition.Include) - matches |= path.StartsWith(condition.Prefix); - else - matches &= !path.StartsWith(condition.Prefix); - } - return matches; - } + public bool Matches(PathTransformer.ITransformedPath path) => FilePattern.Matches(filePatterns, path.Value, out var _); } } diff --git a/csharp/extractor/Semmle.Extraction/PathTransformer.cs b/csharp/extractor/Semmle.Extraction/PathTransformer.cs new file mode 100644 index 00000000000..2c9770e790e --- /dev/null +++ b/csharp/extractor/Semmle.Extraction/PathTransformer.cs @@ -0,0 +1,177 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Diagnostics.CodeAnalysis; +using Semmle.Util; + +namespace Semmle.Extraction +{ + /// + /// A class for interpreting path transformers specified using the environment + /// variable `CODEQL_PATH_TRANSFORMER`. + /// + public sealed class PathTransformer + { + public class InvalidPathTransformerException : Exception + { + public InvalidPathTransformerException(string message) : + base($"Invalid path transformer specification: {message}") + { } + } + + /// + /// A transformed path. + /// + public interface ITransformedPath + { + string Value { get; } + + string Extension { get; } + + string NameWithoutExtension { get; } + + ITransformedPath? ParentDirectory { get; } + + ITransformedPath WithSuffix(string suffix); + + string DatabaseId { get; } + } + + struct TransformedPath : ITransformedPath + { + public TransformedPath(string value) { this.value = value; } + readonly string value; + + public string Value => value; + + public string Extension => Path.GetExtension(value)?.Substring(1) ?? ""; + + public string NameWithoutExtension => Path.GetFileNameWithoutExtension(value); + + public ITransformedPath? ParentDirectory + { + get + { + var dir = Path.GetDirectoryName(value); + if (dir is null) + return null; + var isWindowsDriveLetter = dir.Length == 2 && char.IsLetter(dir[0]) && dir[1] == ':'; + if (isWindowsDriveLetter) + return null; + return new TransformedPath(FileUtils.ConvertToUnix(dir)); + } + } + + public ITransformedPath WithSuffix(string suffix) => new TransformedPath(value + suffix); + + public string DatabaseId + { + get + { + var ret = value; + if (ret.Length >= 2 && ret[1] == ':' && Char.IsLower(ret[0])) + ret = Char.ToUpper(ret[0]) + "_" + ret.Substring(2); + return ret.Replace('\\', '/').Replace(":", "_"); + } + } + + public override int GetHashCode() => 11 * value.GetHashCode(); + + public override bool Equals(object? obj) => obj is TransformedPath tp && tp.value == value; + + public override string ToString() => value; + } + + readonly Func transform; + + /// + /// Returns the path obtained by transforming `path`. + /// + public ITransformedPath Transform(string path) => new TransformedPath(transform(path)); + + /// + /// Default constructor reads parameters from the environment. + /// + public PathTransformer(IPathCache pathCache) : + this(pathCache, Environment.GetEnvironmentVariable("CODEQL_PATH_TRANSFORMER") is string file ? File.ReadAllLines(file) : null) + { + } + + /// + /// Creates a path transformer based on the specification in `lines`. + /// Throws `InvalidPathTransformerException` for invalid specifications. + /// + public PathTransformer(IPathCache pathCache, string[]? lines) + { + if (lines is null) + { + transform = path => FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path)); + return; + } + + var sections = ParsePathTransformerSpec(lines); + transform = path => + { + path = FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path)); + foreach (var section in sections) + { + if (section.Matches(path, out var transformed)) + return transformed; + } + return path; + }; + } + + static IEnumerable ParsePathTransformerSpec(string[] lines) + { + var sections = new List(); + try + { + int i = 0; + while (i < lines.Length && !lines[i].StartsWith("#")) + i++; + while (i < lines.Length) + { + var section = new TransformerSection(lines, ref i); + sections.Add(section); + } + + if (sections.Count == 0) + throw new InvalidPathTransformerException("contains no sections."); + } + catch (InvalidFilePatternException ex) + { + throw new InvalidPathTransformerException(ex.Message); + } + return sections; + } + } + + sealed class TransformerSection + { + readonly string name; + readonly List filePatterns = new List(); + + public TransformerSection(string[] lines, ref int i) + { + name = lines[i++].Substring(1); // skip the '#' + for (; i < lines.Length && !lines[i].StartsWith("#"); i++) + { + var line = lines[i]; + if (!string.IsNullOrWhiteSpace(line)) + filePatterns.Add(new FilePattern(line)); + } + } + + public bool Matches(string path, [NotNullWhen(true)] out string? transformed) + { + if (FilePattern.Matches(filePatterns, path, out var suffix)) + { + transformed = FileUtils.ConvertToUnix(name) + suffix; + return true; + } + transformed = null; + return false; + } + } +} diff --git a/csharp/extractor/Semmle.Extraction/TrapWriter.cs b/csharp/extractor/Semmle.Extraction/TrapWriter.cs index 7ea08eafc1c..8082567c825 100644 --- a/csharp/extractor/Semmle.Extraction/TrapWriter.cs +++ b/csharp/extractor/Semmle.Extraction/TrapWriter.cs @@ -14,12 +14,6 @@ namespace Semmle.Extraction public sealed class TrapWriter : IDisposable { - public enum InnerPathComputation - { - ABSOLUTE, - RELATIVE - } - public enum CompressionMode { None, @@ -45,7 +39,7 @@ namespace Semmle.Extraction readonly CompressionMode TrapCompression; - public TrapWriter(ILogger logger, string outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression) + public TrapWriter(ILogger logger, PathTransformer.ITransformedPath outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression) { Logger = logger; TrapCompression = trapCompression; @@ -107,16 +101,17 @@ namespace Semmle.Extraction /// Adds the specified input file to the source archive. It may end up in either the normal or long path area /// of the source archive, depending on the length of its full path. /// - /// The path to the input file. + /// The path to the input file. + /// The transformed path to the input file. /// The encoding used by the input file. - public void Archive(string inputPath, Encoding inputEncoding) + public void Archive(string originalPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) { if (string.IsNullOrEmpty(archive)) return; // Calling GetFullPath makes this use the canonical capitalisation, if the file exists. - string fullInputPath = Path.GetFullPath(inputPath); + string fullInputPath = Path.GetFullPath(originalPath); - ArchivePath(fullInputPath, inputEncoding); + ArchivePath(fullInputPath, transformedPath, inputEncoding); } /// @@ -124,14 +119,11 @@ namespace Semmle.Extraction /// /// The path of the file. /// The contents of the file. - public void Archive(string inputPath, string contents) + public void Archive(PathTransformer.ITransformedPath inputPath, string contents) { if (string.IsNullOrEmpty(archive)) return; - // Calling GetFullPath makes this use the canonical capitalisation, if the file exists. - string fullInputPath = Path.GetFullPath(inputPath); - - ArchiveContents(fullInputPath, contents); + ArchiveContents(inputPath, contents); } /// @@ -210,18 +202,19 @@ namespace Semmle.Extraction /// source archive less than the system path limit of 260 characters. /// /// The full path to the input file. + /// The transformed path to the input file. /// The encoding used by the input file. /// If the output path in the source archive would /// exceed the system path limit of 260 characters. - private void ArchivePath(string fullInputPath, Encoding inputEncoding) + private void ArchivePath(string fullInputPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) { string contents = File.ReadAllText(fullInputPath, inputEncoding); - ArchiveContents(fullInputPath, contents); + ArchiveContents(transformedPath, contents); } - private void ArchiveContents(string fullInputPath, string contents) + private void ArchiveContents(PathTransformer.ITransformedPath transformedPath, string contents) { - string dest = NestPaths(Logger, archive, fullInputPath, InnerPathComputation.ABSOLUTE); + string dest = NestPaths(Logger, archive, transformedPath.Value); string tmpSrcFile = Path.GetTempFileName(); File.WriteAllText(tmpSrcFile, contents, UTF8); try @@ -236,14 +229,11 @@ namespace Semmle.Extraction } } - public static string NestPaths(ILogger logger, string? outerpath, string innerpath, InnerPathComputation innerPathComputation) + public static string NestPaths(ILogger logger, string? outerpath, string innerpath) { string nested = innerpath; if (!string.IsNullOrEmpty(outerpath)) { - if (!Path.IsPathRooted(innerpath) && innerPathComputation == InnerPathComputation.ABSOLUTE) - innerpath = Path.GetFullPath(innerpath); - // Remove all leading path separators / or \ // For example, UNC paths have two leading \\ innerpath = innerpath.TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); @@ -276,13 +266,13 @@ namespace Semmle.Extraction } } - public static string TrapPath(ILogger logger, string? folder, string filename, TrapWriter.CompressionMode trapCompression) + public static string TrapPath(ILogger logger, string? folder, PathTransformer.ITransformedPath path, TrapWriter.CompressionMode trapCompression) { - filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}"; + var filename = $"{path.Value}.trap{TrapExtension(trapCompression)}"; if (string.IsNullOrEmpty(folder)) folder = Directory.GetCurrentDirectory(); - return NestPaths(logger, folder, filename, InnerPathComputation.ABSOLUTE); ; + return NestPaths(logger, folder, filename); } } } diff --git a/csharp/extractor/Semmle.Util/CanonicalPathCache.cs b/csharp/extractor/Semmle.Util/CanonicalPathCache.cs index bbc8ab995b4..339641ecb35 100644 --- a/csharp/extractor/Semmle.Util/CanonicalPathCache.cs +++ b/csharp/extractor/Semmle.Util/CanonicalPathCache.cs @@ -222,6 +222,29 @@ namespace Semmle.Util this.pathStrategy = pathStrategy; } + + /// + /// Create a CanonicalPathCache. + /// + /// + /// + /// Creates the appropriate PathStrategy object which encapsulates + /// the correct algorithm. Falls back to different implementations + /// depending on platform. + /// + /// + /// Size of the cache. + /// Policy for following symlinks. + /// A new CanonicalPathCache. + public static CanonicalPathCache Create(ILogger logger, int maxCapacity) + { + var preserveSymlinks = + Environment.GetEnvironmentVariable("CODEQL_PRESERVE_SYMLINKS") == "true" || + Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true"; + return Create(logger, maxCapacity, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow); + + } + /// /// Create a CanonicalPathCache. ///