From 26639a113e3991e755464ce4089e67a6e34076dd Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 2 Sep 2020 19:41:22 +0200 Subject: [PATCH 01/10] C#: Rename `Layout.Condition` to `FilePattern` and move to separate file --- .../Semmle.Extraction/FilePattern.cs | 34 +++++++++++++++ csharp/extractor/Semmle.Extraction/Layout.cs | 42 ++++--------------- 2 files changed, 42 insertions(+), 34 deletions(-) create mode 100644 csharp/extractor/Semmle.Extraction/FilePattern.cs diff --git a/csharp/extractor/Semmle.Extraction/FilePattern.cs b/csharp/extractor/Semmle.Extraction/FilePattern.cs new file mode 100644 index 00000000000..b3aab69b181 --- /dev/null +++ b/csharp/extractor/Semmle.Extraction/FilePattern.cs @@ -0,0 +1,34 @@ +using System.IO; + +namespace Semmle.Extraction +{ + /// + /// An file pattern, as used in either an extractor layout file or + /// a path transformer file. + /// + class FilePattern + { + private readonly bool include; + private readonly string prefix; + + public bool Include => include; + + public string Prefix => prefix; + + public FilePattern(string line) + { + include = false; + if (line.StartsWith("-")) + line = line.Substring(1); + else + include = true; + prefix = Normalize(line.Trim()); + } + + static public string Normalize(string path) + { + path = Path.GetFullPath(path); + return path.Replace('\\', '/'); + } + } +} \ No newline at end of file diff --git a/csharp/extractor/Semmle.Extraction/Layout.cs b/csharp/extractor/Semmle.Extraction/Layout.cs index 9ab7ed5738a..1f772635df5 100644 --- a/csharp/extractor/Semmle.Extraction/Layout.cs +++ b/csharp/extractor/Semmle.Extraction/Layout.cs @@ -61,7 +61,7 @@ namespace Semmle.Extraction /// /// The source file. /// A newly created TrapWriter. - public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => + public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression); } @@ -167,33 +167,7 @@ namespace Semmle.Extraction sealed class LayoutBlock { - struct Condition - { - private readonly bool include; - private readonly string prefix; - - public bool Include => include; - - public string Prefix => prefix; - - public Condition(string line) - { - include = false; - if (line.StartsWith("-")) - line = line.Substring(1); - else - include = true; - prefix = Normalise(line.Trim()); - } - - static public string Normalise(string path) - { - path = Path.GetFullPath(path); - return path.Replace('\\', '/'); - } - } - - private readonly List conditions = new List(); + private readonly List filePatterns = new List(); public readonly Layout.SubProject Directories; @@ -219,20 +193,20 @@ namespace Semmle.Extraction ReadVariable("ODASA_BUILD_ERROR_DIR", lines[i++]); while (i < lines.Length && !lines[i].StartsWith("#")) { - conditions.Add(new Condition(lines[i++])); + filePatterns.Add(new FilePattern(lines[i++])); } } public bool Matches(string path) { bool matches = false; - path = Condition.Normalise(path); - foreach (Condition condition in conditions) + path = FilePattern.Normalize(path); + foreach (var filePattern in filePatterns) { - if (condition.Include) - matches |= path.StartsWith(condition.Prefix); + if (filePattern.Include) + matches |= path.StartsWith(filePattern.Prefix); else - matches &= !path.StartsWith(condition.Prefix); + matches &= !path.StartsWith(filePattern.Prefix); } return matches; } From 14567f531408c47f9e7b58ef9b0a34bb2c5e4c65 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Mon, 7 Sep 2020 08:57:12 +0200 Subject: [PATCH 02/10] C#: Support wild-cards in file patterns Implements the specification at https://wiki.semmle.com/display/SDmaster/project-layout+format by compiling file path specifications to regular expressions. --- .../Semmle.Extraction.Tests/FilePattern.cs | 48 +++++++ .../Semmle.Extraction/FilePattern.cs | 125 +++++++++++++++--- csharp/extractor/Semmle.Extraction/Layout.cs | 14 +- 3 files changed, 157 insertions(+), 30 deletions(-) create mode 100644 csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs diff --git a/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs b/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs new file mode 100644 index 00000000000..dfff75ea18b --- /dev/null +++ b/csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs @@ -0,0 +1,48 @@ +using Xunit; + +namespace Semmle.Extraction.Tests +{ + public class FilePatternTests + { + [Fact] + public void TestRegexCompilation() + { + var fp = new FilePattern("/hadoop*"); + Assert.Equal("^hadoop[^/]*.*", fp.RegexPattern); + fp = new FilePattern("**/org/apache/hadoop"); + Assert.Equal("^.*/org/apache/hadoop.*", fp.RegexPattern); + fp = new FilePattern("hadoop-common/**/test// "); + Assert.Equal("^hadoop-common/.*/test(?/).*", fp.RegexPattern); + fp = new FilePattern(@"-C:\agent\root\asdf//"); + Assert.Equal("^C:/agent/root/asdf(?/).*", fp.RegexPattern); + fp = new FilePattern(@"-C:\agent+\[root]\asdf//"); + Assert.Equal(@"^C:/agent\+/\[root]/asdf(?/).*", fp.RegexPattern); + } + + [Fact] + public void TestMatching() + { + var fp1 = new FilePattern(@"C:\agent\root\abc//"); + var fp2 = new FilePattern(@"C:\agent\root\def//ghi"); + var patterns = new[] { fp1, fp2 }; + + var success = FilePattern.Matches(patterns, @"C:\agent\root\abc\file.cs", out var s); + Assert.True(success); + Assert.Equal("/file.cs", s); + + success = FilePattern.Matches(patterns, @"C:\agent\root\def\ghi\file.cs", out s); + Assert.True(success); + Assert.Equal("/ghi/file.cs", s); + + success = FilePattern.Matches(patterns, @"C:\agent\root\def\file.cs", out s); + Assert.False(success); + } + + [Fact] + public void TestInvalidPatterns() + { + Assert.Throws(() => new FilePattern("/abc//def//ghi")); + Assert.Throws(() => new FilePattern("/abc**def")); + } + } +} diff --git a/csharp/extractor/Semmle.Extraction/FilePattern.cs b/csharp/extractor/Semmle.Extraction/FilePattern.cs index b3aab69b181..c648faadbe1 100644 --- a/csharp/extractor/Semmle.Extraction/FilePattern.cs +++ b/csharp/extractor/Semmle.Extraction/FilePattern.cs @@ -1,34 +1,125 @@ -using System.IO; +using System; +using System.Collections.Generic; +using System.Text; +using System.Text.RegularExpressions; +using System.Diagnostics.CodeAnalysis; +using Semmle.Util; namespace Semmle.Extraction { + public sealed class InvalidFilePatternException : Exception + { + public InvalidFilePatternException(string pattern, string message) : + base($"Invalid file pattern '{pattern}': {message}") + { } + } + /// /// An file pattern, as used in either an extractor layout file or /// a path transformer file. /// - class FilePattern + public sealed class FilePattern { - private readonly bool include; - private readonly string prefix; + /// + /// Whether this is an inclusion pattern. + /// + public bool Include { get; } - public bool Include => include; - - public string Prefix => prefix; - - public FilePattern(string line) + public FilePattern(string pattern) { - include = false; - if (line.StartsWith("-")) - line = line.Substring(1); + Include = false; + if (pattern.StartsWith("-")) + pattern = pattern.Substring(1); else - include = true; - prefix = Normalize(line.Trim()); + Include = true; + pattern = FileUtils.ConvertToUnix(pattern.Trim()).TrimStart('/'); + RegexPattern = BuildRegex(pattern).ToString(); } - static public string Normalize(string path) + /// + /// Constructs a regex string from a file pattern. Throws + /// `InvalidFilePatternException` for invalid patterns. + /// + static StringBuilder BuildRegex(string pattern) { - path = Path.GetFullPath(path); - return path.Replace('\\', '/'); + bool HasCharAt(int i, Predicate p) => + i >= 0 && i < pattern.Length && p(pattern[i]); + var sb = new StringBuilder(); + var i = 0; + var seenDoubleSlash = false; + sb.Append('^'); + while (i < pattern.Length) + { + if (pattern[i] == '/') + { + if (HasCharAt(i + 1, c => c == '/')) + { + if (seenDoubleSlash) + throw new InvalidFilePatternException(pattern, "'//' is allowed at most once."); + sb.Append("(?/)"); + i += 2; + seenDoubleSlash = true; + } + else + { + sb.Append('/'); + i++; + } + } + else if (pattern[i] == '*') + { + if (HasCharAt(i + 1, c => c == '*')) + { + if (HasCharAt(i - 1, c => c != '/')) + throw new InvalidFilePatternException(pattern, "'**' preceeded by non-`/` character."); + if (HasCharAt(i + 2, c => c != '/')) + throw new InvalidFilePatternException(pattern, "'**' succeeded by non-`/` character"); + sb.Append(".*"); + i += 2; + } + else + { + sb.Append("[^/]*"); + i++; + } + } + else + sb.Append(Regex.Escape(pattern[i++].ToString())); + } + return sb.Append(".*"); + } + + + /// + /// The regex pattern compiled from this file pattern. + /// + public string RegexPattern { get; } + + /// + /// Returns `true` if the set of file patterns `patterns` match the path `path`. + /// If so, `transformerSuffix` will contain the part of `path` that needs to be + /// suffixed when using path transformers. + /// + public static bool Matches(IEnumerable patterns, string path, [NotNullWhen(true)] out string? transformerSuffix) + { + path = FileUtils.ConvertToUnix(path).TrimStart('/'); + Match? lastMatch = null; + foreach (var pattern in patterns) + { + var m = new Regex(pattern.RegexPattern).Match(path); + if (m.Success) + lastMatch = pattern.Include ? m : null; + } + if (lastMatch is Match) + { + transformerSuffix = lastMatch.Groups.TryGetValue("doubleslash", out var group) + ? path.Substring(group.Index) + : path; + return true; + } + + transformerSuffix = null; + return false; } } } \ No newline at end of file diff --git a/csharp/extractor/Semmle.Extraction/Layout.cs b/csharp/extractor/Semmle.Extraction/Layout.cs index 1f772635df5..1e44c3142e4 100644 --- a/csharp/extractor/Semmle.Extraction/Layout.cs +++ b/csharp/extractor/Semmle.Extraction/Layout.cs @@ -197,18 +197,6 @@ namespace Semmle.Extraction } } - public bool Matches(string path) - { - bool matches = false; - path = FilePattern.Normalize(path); - foreach (var filePattern in filePatterns) - { - if (filePattern.Include) - matches |= path.StartsWith(filePattern.Prefix); - else - matches &= !path.StartsWith(filePattern.Prefix); - } - return matches; - } + public bool Matches(string path) => FilePattern.Matches(filePatterns, path, out var _); } } From 37f1ce312256e36553c3951fba3c5c0c8f40ad9c Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Mon, 7 Sep 2020 08:57:49 +0200 Subject: [PATCH 03/10] C#: Implement support for path transformers --- .../Semmle.Extraction.CIL/Context.cs | 2 +- .../Entities/Assembly.cs | 10 +- .../Semmle.Extraction.CIL/Entities/File.cs | 27 +-- .../Semmle.Extraction.CIL/Entities/Folder.cs | 20 +- .../Semmle.Extraction.CIL/Factories.cs | 4 +- .../Semmle.Extraction.CSharp/Analyser.cs | 26 ++- .../Entities/Compilation.cs | 13 +- .../Semmle.Extraction.CSharp/Extractor.cs | 13 +- .../Semmle.Extraction.Tests/Layout.cs | 84 +++++---- .../PathTransformer.cs | 45 +++++ .../Semmle.Extraction.Tests/TrapWriter.cs | 27 +-- .../Semmle.Extraction/Entities/File.cs | 89 +++------ .../Semmle.Extraction/Entities/Folder.cs | 43 ++--- .../extractor/Semmle.Extraction/Extractor.cs | 12 +- csharp/extractor/Semmle.Extraction/Layout.cs | 13 +- .../Semmle.Extraction/PathTransformer.cs | 178 ++++++++++++++++++ .../extractor/Semmle.Extraction/TrapWriter.cs | 44 ++--- .../Semmle.Util/CanonicalPathCache.cs | 23 +++ 18 files changed, 442 insertions(+), 231 deletions(-) create mode 100644 csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs create mode 100644 csharp/extractor/Semmle.Extraction/PathTransformer.cs diff --git a/csharp/extractor/Semmle.Extraction.CIL/Context.cs b/csharp/extractor/Semmle.Extraction.CIL/Context.cs index 79fb8b7a8d4..6b8b01b109a 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Context.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Context.cs @@ -37,7 +37,7 @@ namespace Semmle.Extraction.CIL namespaceFactory = new CachedFunction(n => CreateNamespace(mdReader.GetString(n))); namespaceDefinitionFactory = new CachedFunction(CreateNamespace); sourceFiles = new CachedFunction(path => new Entities.PdbSourceFile(this, path)); - folders = new CachedFunction(path => new Entities.Folder(this, path)); + folders = new CachedFunction(path => new Entities.Folder(this, path)); sourceLocations = new CachedFunction(location => new Entities.PdbSourceLocation(this, location)); defaultGenericContext = new EmptyContext(this); diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs index 255d37699f2..2504d7e7954 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/Assembly.cs @@ -5,6 +5,7 @@ using Semmle.Util.Logging; using System; using Semmle.Extraction.Entities; using System.IO; +using Semmle.Util; namespace Semmle.Extraction.CIL.Entities { @@ -134,9 +135,12 @@ namespace Semmle.Extraction.CIL.Entities extracted = false; try { - var extractor = new Extractor(false, assemblyPath, logger); - var project = layout.LookupProjectOrDefault(assemblyPath); - using (var trapWriter = project.CreateTrapWriter(logger, assemblyPath + ".cil", true, trapCompression)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + var extractor = new Extractor(false, assemblyPath, logger, pathTransformer); + var transformedAssemblyPath = pathTransformer.Transform(assemblyPath); + var project = layout.LookupProjectOrDefault(transformedAssemblyPath); + using (var trapWriter = project.CreateTrapWriter(logger, transformedAssemblyPath.WithSuffix(".cil"), true, trapCompression)) { trapFile = trapWriter.TrapFile; if (nocache || !System.IO.File.Exists(trapFile)) diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs index bc8c4c8c76d..b164fad1acd 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/File.cs @@ -13,33 +13,38 @@ namespace Semmle.Extraction.CIL.Entities public class File : LabelledEntity, IFile { - protected readonly string path; + protected readonly string OriginalPath; + protected readonly PathTransformer.ITransformedPath TransformedPath; public File(Context cx, string path) : base(cx) { - this.path = Semmle.Extraction.Entities.File.PathAsDatabaseString(path); + this.OriginalPath = path; + TransformedPath = cx.cx.Extractor.PathTransformer.Transform(OriginalPath); } public override void WriteId(TextWriter trapFile) { - trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path)); + trapFile.Write(TransformedPath.DatabaseId); } public override bool Equals(object obj) { - return GetType() == obj.GetType() && path == ((File)obj).path; + return GetType() == obj.GetType() && OriginalPath == ((File)obj).OriginalPath; } - public override int GetHashCode() => 11 * path.GetHashCode(); + public override int GetHashCode() => 11 * OriginalPath.GetHashCode(); public override IEnumerable Contents { get { - var parent = cx.CreateFolder(System.IO.Path.GetDirectoryName(path)); - yield return parent; - yield return Tuples.containerparent(parent, this); - yield return Tuples.files(this, path, System.IO.Path.GetFileNameWithoutExtension(path), System.IO.Path.GetExtension(path).Substring(1)); + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir) + { + var parent = cx.CreateFolder(dir); + yield return parent; + yield return Tuples.containerparent(parent, this); + } + yield return Tuples.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension); } } @@ -65,9 +70,9 @@ namespace Semmle.Extraction.CIL.Entities var text = file.Contents; if (text == null) - cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", path)); + cx.cx.Extractor.Logger.Log(Util.Logging.Severity.Warning, string.Format("PDB source file {0} could not be found", OriginalPath)); else - cx.cx.TrapWriter.Archive(path, text); + cx.cx.TrapWriter.Archive(TransformedPath, text); yield return Tuples.file_extraction_mode(this, 2); } diff --git a/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs b/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs index 48ebe6a19d1..f95492bb0e4 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Entities/Folder.cs @@ -9,16 +9,16 @@ namespace Semmle.Extraction.CIL.Entities public sealed class Folder : LabelledEntity, IFolder { - readonly string path; + readonly PathTransformer.ITransformedPath TransformedPath; - public Folder(Context cx, string path) : base(cx) + public Folder(Context cx, PathTransformer.ITransformedPath path) : base(cx) { - this.path = path; + this.TransformedPath = path; } public override void WriteId(TextWriter trapFile) { - trapFile.Write(Semmle.Extraction.Entities.File.PathAsDatabaseId(path)); + trapFile.Write(TransformedPath.DatabaseId); } public override string IdSuffix => ";folder"; @@ -27,25 +27,21 @@ namespace Semmle.Extraction.CIL.Entities { get { - // On Posix, we could get a Windows directory of the form "C:" - bool windowsDriveLetter = path.Length == 2 && char.IsLetter(path[0]) && path[1] == ':'; - - var parent = Path.GetDirectoryName(path); - if (parent != null && !windowsDriveLetter) + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath parent) { var parentFolder = cx.CreateFolder(parent); yield return parentFolder; yield return Tuples.containerparent(parentFolder, this); } - yield return Tuples.folders(this, Semmle.Extraction.Entities.File.PathAsDatabaseString(path), Path.GetFileName(path)); + yield return Tuples.folders(this, TransformedPath.Value, TransformedPath.NameWithoutExtension); } } public override bool Equals(object obj) { - return obj is Folder folder && path == folder.path; + return obj is Folder folder && TransformedPath == folder.TransformedPath; } - public override int GetHashCode() => path.GetHashCode(); + public override int GetHashCode() => TransformedPath.GetHashCode(); } } diff --git a/csharp/extractor/Semmle.Extraction.CIL/Factories.cs b/csharp/extractor/Semmle.Extraction.CIL/Factories.cs index f522521a845..5c37228a41e 100644 --- a/csharp/extractor/Semmle.Extraction.CIL/Factories.cs +++ b/csharp/extractor/Semmle.Extraction.CIL/Factories.cs @@ -201,7 +201,7 @@ namespace Semmle.Extraction.CIL #region Locations readonly CachedFunction sourceFiles; - readonly CachedFunction folders; + readonly CachedFunction folders; readonly CachedFunction sourceLocations; /// @@ -216,7 +216,7 @@ namespace Semmle.Extraction.CIL /// /// The path of the folder. /// A folder entity. - public Folder CreateFolder(string path) => folders[path]; + public Folder CreateFolder(PathTransformer.ITransformedPath path) => folders[path]; /// /// Creates a source location. diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs b/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs index 544a1819117..a2db3e4a2db 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Analyser.cs @@ -25,12 +25,15 @@ namespace Semmle.Extraction.CSharp public readonly ILogger Logger; - public Analyser(IProgressMonitor pm, ILogger logger) + public readonly PathTransformer PathTransformer; + + public Analyser(IProgressMonitor pm, ILogger logger, PathTransformer pathTransformer) { Logger = logger; Logger.Log(Severity.Info, "EXTRACTION STARTING at {0}", DateTime.Now); stopWatch.Start(); progressMonitor = pm; + PathTransformer = pathTransformer; } CSharpCompilation compilation; @@ -64,7 +67,7 @@ namespace Semmle.Extraction.CSharp layout = new Layout(); this.options = options; this.compilation = compilation; - extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger); + extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger, PathTransformer); LogDiagnostics(); SetReferencePaths(); @@ -114,7 +117,7 @@ namespace Semmle.Extraction.CSharp { compilation = compilationIn; layout = new Layout(); - extractor = new Extraction.Extractor(true, null, Logger); + extractor = new Extraction.Extractor(true, null, Logger, PathTransformer); this.options = options; LogExtractorInfo(Extraction.Extractor.Version); SetReferencePaths(); @@ -227,9 +230,10 @@ namespace Semmle.Extraction.CSharp try { var assemblyPath = extractor.OutputPath; + var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); var assembly = compilation.Assembly; - var projectLayout = layout.LookupProjectOrDefault(assemblyPath); - var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression); + var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); + var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression); compilationTrapFile = trapWriter; // Dispose later var cx = extractor.CreateContext(compilation.Clone(), trapWriter, new AssemblyScope(assembly, assemblyPath, true)); @@ -257,8 +261,9 @@ namespace Semmle.Extraction.CSharp stopwatch.Start(); var assemblyPath = r.FilePath; - var projectLayout = layout.LookupProjectOrDefault(assemblyPath); - using (var trapWriter = projectLayout.CreateTrapWriter(Logger, assemblyPath, true, options.TrapCompression)) + var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); + var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); + using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression)) { var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile); @@ -357,16 +362,17 @@ namespace Semmle.Extraction.CSharp var stopwatch = new Stopwatch(); stopwatch.Start(); var sourcePath = tree.FilePath; + var transformedSourcePath = PathTransformer.Transform(sourcePath); - var projectLayout = layout.LookupProjectOrNull(sourcePath); + var projectLayout = layout.LookupProjectOrNull(transformedSourcePath); bool excluded = projectLayout == null; - string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, sourcePath, options.TrapCompression); + string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, transformedSourcePath, options.TrapCompression); bool upToDate = false; if (!excluded) { // compilation.Clone() is used to allow symbols to be garbage collected. - using (var trapWriter = projectLayout.CreateTrapWriter(Logger, sourcePath, false, options.TrapCompression)) + using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedSourcePath, false, options.TrapCompression)) { upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile); diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs index b6ff91f5988..9f96b03f9f3 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/Compilation.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using Semmle.Util; namespace Semmle.Extraction.CSharp.Entities { @@ -22,32 +23,32 @@ namespace Semmle.Extraction.CSharp.Entities { Extraction.Entities.Assembly.CreateOutputAssembly(cx); - trapFile.compilations(this, Extraction.Entities.File.PathAsDatabaseString(cwd)); + trapFile.compilations(this, FileUtils.ConvertToUnix(cwd)); // Arguments int index = 0; - foreach(var arg in args) + foreach (var arg in args) { trapFile.compilation_args(this, index++, arg); } // Files index = 0; - foreach(var file in cx.Compilation.SyntaxTrees.Select(tree => Extraction.Entities.File.Create(cx, tree.FilePath))) + foreach (var file in cx.Compilation.SyntaxTrees.Select(tree => Extraction.Entities.File.Create(cx, tree.FilePath))) { trapFile.compilation_compiling_files(this, index++, file); } // References index = 0; - foreach(var file in cx.Compilation.References.OfType().Select(r => Extraction.Entities.File.Create(cx, r.FilePath))) + foreach (var file in cx.Compilation.References.OfType().Select(r => Extraction.Entities.File.Create(cx, r.FilePath))) { trapFile.compilation_referencing_files(this, index++, file); } // Diagnostics index = 0; - foreach(var diag in cx.Compilation.GetDiagnostics().Select(d => new Diagnostic(cx, d))) + foreach (var diag in cx.Compilation.GetDiagnostics().Select(d => new Diagnostic(cx, d))) { trapFile.diagnostic_for(diag, this, 0, index++); } @@ -57,7 +58,7 @@ namespace Semmle.Extraction.CSharp.Entities { var trapFile = cx.TrapWriter.Writer; int index = 0; - foreach(float metric in p.Metrics) + foreach (float metric in p.Metrics) { trapFile.compilation_time(this, -1, index++, metric); } diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs index 7ca12e00f26..e775e422e8c 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor.cs @@ -76,16 +76,16 @@ namespace Semmle.Extraction.CSharp return ExitCode.Ok; } - using (var analyser = new Analyser(new LogProgressMonitor(logger), logger)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + + using (var analyser = new Analyser(new LogProgressMonitor(logger), logger, pathTransformer)) using (var references = new BlockingCollection()) { try { var compilerVersion = new CompilerVersion(commandLineArguments); - bool preserveSymlinks = Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true"; - var canonicalPathCache = CanonicalPathCache.Create(logger, 1000, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow); - if (compilerVersion.SkipExtraction) { logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason); @@ -317,7 +317,10 @@ namespace Semmle.Extraction.CSharp ILogger logger, CommonOptions options) { - using (var analyser = new Analyser(pm, logger)) + var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); + var pathTransformer = new PathTransformer(canonicalPathCache); + + using (var analyser = new Analyser(pm, logger, pathTransformer)) using (var references = new BlockingCollection()) { try diff --git a/csharp/extractor/Semmle.Extraction.Tests/Layout.cs b/csharp/extractor/Semmle.Extraction.Tests/Layout.cs index 49da0fd45c5..301b2d8a881 100644 --- a/csharp/extractor/Semmle.Extraction.Tests/Layout.cs +++ b/csharp/extractor/Semmle.Extraction.Tests/Layout.cs @@ -5,6 +5,26 @@ using System.Runtime.InteropServices; namespace Semmle.Extraction.Tests { + struct TransformedPathStub : PathTransformer.ITransformedPath + { + readonly string value; + public TransformedPathStub(string value) => this.value = value; + public string Value => value; + + public string Extension => throw new System.NotImplementedException(); + + public string NameWithoutExtension => throw new System.NotImplementedException(); + + public PathTransformer.ITransformedPath ParentDirectory => throw new System.NotImplementedException(); + + public string DatabaseId => throw new System.NotImplementedException(); + + public PathTransformer.ITransformedPath WithSuffix(string suffix) + { + throw new System.NotImplementedException(); + } + } + public class Layout { readonly ILogger Logger = new LoggerMock(); @@ -13,10 +33,10 @@ namespace Semmle.Extraction.Tests public void TestDefaultLayout() { var layout = new Semmle.Extraction.Layout(null, null, null); - var project = layout.LookupProjectOrNull("foo.cs"); + var project = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")); // All files are mapped when there's no layout file. - Assert.True(layout.FileInLayout("foo.cs")); + Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs"))); // Test trap filename var tmpDir = Path.GetTempPath(); @@ -28,13 +48,13 @@ namespace Semmle.Extraction.Tests Assert.NotEqual(Directory.GetCurrentDirectory(), tmpDir); return; } - var f1 = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + var f1 = project.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, tmpDir, "foo.cs.trap.gz"); Assert.Equal(f1, g1); // Test trap file generation - var trapwriterFilename = project.GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - using (var trapwriter = project.CreateTrapWriter(Logger, "foo.cs", false, TrapWriter.CompressionMode.Gzip)) + var trapwriterFilename = project.GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + using (var trapwriter = project.CreateTrapWriter(Logger, new TransformedPathStub("foo.cs"), false, TrapWriter.CompressionMode.Gzip)) { trapwriter.Emit("1=*"); Assert.False(File.Exists(trapwriterFilename)); @@ -63,23 +83,23 @@ namespace Semmle.Extraction.Tests var layout = new Semmle.Extraction.Layout(null, null, "layout.txt"); // Test general pattern matching - Assert.True(layout.FileInLayout("bar.cs")); - Assert.False(layout.FileInLayout("foo.cs")); - Assert.False(layout.FileInLayout("goo.cs")); - Assert.False(layout.FileInLayout("excluded/bar.cs")); - Assert.True(layout.FileInLayout("excluded/foo.cs")); - Assert.True(layout.FileInLayout("included/foo.cs")); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("foo.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("goo.cs"))); + Assert.False(layout.FileInLayout(new TransformedPathStub("excluded/bar.cs"))); + Assert.True(layout.FileInLayout(new TransformedPathStub("excluded/foo.cs"))); + Assert.True(layout.FileInLayout(new TransformedPathStub("included/foo.cs"))); // Test the trap file - var project = layout.LookupProjectOrNull("bar.cs"); - var trapwriterFilename = project.GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip); - Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE), + var project = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs")); + var trapwriterFilename = project.GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip); + Assert.Equal(TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "bar.cs.trap.gz"), trapwriterFilename); // Test the source archive - var trapWriter = project.CreateTrapWriter(Logger, "bar.cs", false, TrapWriter.CompressionMode.Gzip); - trapWriter.Archive("layout.txt", System.Text.Encoding.ASCII); - var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt", TrapWriter.InnerPathComputation.ABSOLUTE); + var trapWriter = project.CreateTrapWriter(Logger, new TransformedPathStub("bar.cs"), false, TrapWriter.CompressionMode.Gzip); + trapWriter.Archive("layout.txt", new TransformedPathStub("layout.txt"), System.Text.Encoding.ASCII); + var writtenFile = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\archive"), "layout.txt"); Assert.True(File.Exists(writtenFile)); File.Delete("layout.txt"); } @@ -89,9 +109,9 @@ namespace Semmle.Extraction.Tests { // When you specify both a trap file and a layout, use the trap file. var layout = new Semmle.Extraction.Layout(Path.GetFullPath("snapshot\\trap"), null, "something.txt"); - Assert.True(layout.FileInLayout("bar.cs")); - var f1 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + var f1 = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")).GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap"), "foo.cs.trap.gz"); Assert.Equal(f1, g1); } @@ -117,26 +137,26 @@ namespace Semmle.Extraction.Tests var layout = new Semmle.Extraction.Layout(null, null, "layout.txt"); // Use Section 2 - Assert.True(layout.FileInLayout("bar.cs")); - var f1 = layout.LookupProjectOrNull("bar.cs").GetTrapPath(Logger, "bar.cs", TrapWriter.CompressionMode.Gzip); - var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + Assert.True(layout.FileInLayout(new TransformedPathStub("bar.cs"))); + var f1 = layout.LookupProjectOrNull(new TransformedPathStub("bar.cs")).GetTrapPath(Logger, new TransformedPathStub("bar.cs"), TrapWriter.CompressionMode.Gzip); + var g1 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap2"), "bar.cs.trap.gz"); Assert.Equal(f1, g1); // Use Section 1 - Assert.True(layout.FileInLayout("foo.cs")); - var f2 = layout.LookupProjectOrNull("foo.cs").GetTrapPath(Logger, "foo.cs", TrapWriter.CompressionMode.Gzip); - var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + Assert.True(layout.FileInLayout(new TransformedPathStub("foo.cs"))); + var f2 = layout.LookupProjectOrNull(new TransformedPathStub("foo.cs")).GetTrapPath(Logger, new TransformedPathStub("foo.cs"), TrapWriter.CompressionMode.Gzip); + var g2 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "foo.cs.trap.gz"); Assert.Equal(f2, g2); // boo.dll is not in the layout, so use layout from first section. - Assert.False(layout.FileInLayout("boo.dll")); - var f3 = layout.LookupProjectOrDefault("boo.dll").GetTrapPath(Logger, "boo.dll", TrapWriter.CompressionMode.Gzip); - var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz", TrapWriter.InnerPathComputation.ABSOLUTE); + Assert.False(layout.FileInLayout(new TransformedPathStub("boo.dll"))); + var f3 = layout.LookupProjectOrDefault(new TransformedPathStub("boo.dll")).GetTrapPath(Logger, new TransformedPathStub("boo.dll"), TrapWriter.CompressionMode.Gzip); + var g3 = TrapWriter.NestPaths(Logger, Path.GetFullPath("snapshot\\trap1"), "boo.dll.trap.gz"); Assert.Equal(f3, g3); // boo.cs is not in the layout, so return null - Assert.False(layout.FileInLayout("boo.cs")); - Assert.Null(layout.LookupProjectOrNull("boo.cs")); + Assert.False(layout.FileInLayout(new TransformedPathStub("boo.cs"))); + Assert.Null(layout.LookupProjectOrNull(new TransformedPathStub("boo.cs"))); } [Fact] diff --git a/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs new file mode 100644 index 00000000000..04865ceb8c1 --- /dev/null +++ b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs @@ -0,0 +1,45 @@ +using Semmle.Util; +using Xunit; + +namespace Semmle.Extraction.Tests +{ + class PathCacheStub : IPathCache + { + public string GetCanonicalPath(string path) => path; + } + + public class PathTransformerTests + { + [Fact] + public void TestTransformerFile() + { + var spec = new string[] + { + @"#D:\src", + @"C:\agent*\src//", + @"-C:\agent*\src\external", + @"", + @"#empty", + @"", + @"#src2", + @"/agent*//src", + @"", + @"#optsrc", + @"opt/src//" + }; + + var pathTransformer = new PathTransformer(new PathCacheStub(), spec); + + // Windows-style matching + Assert.Equal(@"C:\bar.cs", pathTransformer.Transform(@"C:\bar.cs").Value); + Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent42\src\file.cs").Value); + Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent43\src\file.cs").Value); + Assert.Equal(@"C:\agent43\src\external\file.cs", pathTransformer.Transform(@"C:\agent43\src\external\file.cs").Value); + + // Linux-style matching + Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent/src/file.cs").Value); + Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent42/src/file.cs").Value); + Assert.Equal(@"optsrc/file.cs", pathTransformer.Transform(@"/opt/src/file.cs").Value); + } + } +} diff --git a/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs b/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs index fd7f77f427b..54da865689b 100644 --- a/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs +++ b/csharp/extractor/Semmle.Extraction.Tests/TrapWriter.cs @@ -14,7 +14,7 @@ namespace Semmle.Extraction.Tests string tempDir = System.IO.Path.GetTempPath(); string root1, root2, root3; - if(Win32.IsWindows()) + if (Win32.IsWindows()) { root1 = "E:"; root2 = "e:"; @@ -27,32 +27,21 @@ namespace Semmle.Extraction.Tests root3 = "/"; } - string formattedTempDir = tempDir.Replace('/', '\\').Replace(':', '_').Trim('\\'); - var logger = new LoggerMock(); - System.IO.Directory.SetCurrentDirectory(tempDir); - if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) - { - // `Directory.SetCurrentDirectory()` doesn't seem to work on macOS, - // so disable this test on macOS, for now - Assert.NotEqual(Directory.GetCurrentDirectory(), tempDir); - return; - } + Assert.Equal($@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\')); - Assert.Equal($@"C:\Temp\source_archive\{formattedTempDir}\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/','\\')); + Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", "def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\E_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root1}\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\e_\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root2}\source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs").Replace('/', '\\')); - Assert.Equal(@"C:\Temp\source_archive\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}source\def.cs", TrapWriter.InnerPathComputation.RELATIVE).Replace('/', '\\')); - - Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs", TrapWriter.InnerPathComputation.ABSOLUTE).Replace('/', '\\')); + Assert.Equal(@"C:\Temp\source_archive\diskstation\share\source\def.cs", TrapWriter.NestPaths(logger, @"C:\Temp\source_archive", $@"{root3}{root3}diskstation\share\source\def.cs").Replace('/', '\\')); } class LoggerMock : ILogger diff --git a/csharp/extractor/Semmle.Extraction/Entities/File.cs b/csharp/extractor/Semmle.Extraction/Entities/File.cs index cbdf1535fbb..a0e9ea6d725 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/File.cs @@ -10,93 +10,54 @@ namespace Semmle.Extraction.Entities File(Context cx, string path) : base(cx, path) { - Path = path; + OriginalPath = path; + TransformedPath = Context.Extractor.PathTransformer.Transform(OriginalPath); } - public string Path - { - get; - private set; - } + readonly string OriginalPath; + readonly PathTransformer.ITransformedPath TransformedPath; - public string DatabasePath => PathAsDatabaseId(Path); - - public override bool NeedsPopulation => Context.DefinesFile(Path) || Path == Context.Extractor.OutputPath; + public override bool NeedsPopulation => Context.DefinesFile(OriginalPath) || OriginalPath == Context.Extractor.OutputPath; public override void Populate(TextWriter trapFile) { - if (Path == null) + trapFile.files(this, TransformedPath.Value, TransformedPath.NameWithoutExtension, TransformedPath.Extension); + + if (TransformedPath.ParentDirectory is PathTransformer.ITransformedPath dir) + trapFile.containerparent(Folder.Create(Context, dir), this); + + var fromSource = TransformedPath.Extension.ToLowerInvariant().Equals("cs"); + if (fromSource) { - trapFile.files(this, "", "", ""); - } - else - { - var fi = new FileInfo(Path); - - string extension = fi.Extension ?? ""; - string name = fi.Name; - name = name.Substring(0, name.Length - extension.Length); - int fromSource = extension.ToLowerInvariant().Equals(".cs") ? 1 : 2; - - // remove the dot from the extension - if (extension.Length > 0) - extension = extension.Substring(1); - trapFile.files(this, PathAsDatabaseString(Path), name, extension); - - trapFile.containerparent(Folder.Create(Context, fi.Directory), this); - if (fromSource == 1) + foreach (var text in Context.Compilation.SyntaxTrees. + Where(t => t.FilePath == OriginalPath). + Select(tree => tree.GetText())) { - foreach (var text in Context.Compilation.SyntaxTrees. - Where(t => t.FilePath == Path). - Select(tree => tree.GetText())) - { - var rawText = text.ToString() ?? ""; - var lineCounts = LineCounter.ComputeLineCounts(rawText); - if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++; + var rawText = text.ToString() ?? ""; + var lineCounts = LineCounter.ComputeLineCounts(rawText); + if (rawText.Length > 0 && rawText[rawText.Length - 1] != '\n') lineCounts.Total++; - trapFile.numlines(this, lineCounts); - Context.TrapWriter.Archive(fi.FullName, text.Encoding ?? System.Text.Encoding.Default); - } + trapFile.numlines(this, lineCounts); + Context.TrapWriter.Archive(OriginalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default); } - - trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0); } + + trapFile.file_extraction_mode(this, Context.Extractor.Standalone ? 1 : 0); } public override void WriteId(System.IO.TextWriter trapFile) { - if (Path is null) - trapFile.Write("GENERATED;sourcefile"); - else - { - trapFile.Write(DatabasePath); - trapFile.Write(";sourcefile"); - } + trapFile.Write(TransformedPath.DatabaseId); + trapFile.Write(";sourcefile"); } - /// - /// Converts a path string into a string to use as an ID - /// in the QL database. - /// - /// An absolute path. - /// The database ID. - public static string PathAsDatabaseId(string path) - { - if (path.Length >= 2 && path[1] == ':' && Char.IsLower(path[0])) - path = Char.ToUpper(path[0]) + "_" + path.Substring(2); - return path.Replace('\\', '/').Replace(":", "_"); - } - - public static string PathAsDatabaseString(string path) => path.Replace('\\', '/'); - public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, path); public static File CreateGenerated(Context cx) => GeneratedFile.Create(cx); class GeneratedFile : File { - GeneratedFile(Context cx) - : base(cx, "") { } + GeneratedFile(Context cx) : base(cx, "") { } public override bool NeedsPopulation => true; diff --git a/csharp/extractor/Semmle.Extraction/Entities/Folder.cs b/csharp/extractor/Semmle.Extraction/Entities/Folder.cs index 3c29ee38bd9..0e275e37d8d 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/Folder.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/Folder.cs @@ -2,65 +2,44 @@ using System.IO; namespace Semmle.Extraction.Entities { - sealed class Folder : CachedEntity + sealed class Folder : CachedEntity { - Folder(Context cx, DirectoryInfo init) - : base(cx, init) - { - Path = init.FullName; - } - - public string Path - { - get; - private set; - } - - public string DatabasePath => File.PathAsDatabaseId(Path); + Folder(Context cx, PathTransformer.ITransformedPath init) : base(cx, init) { } public override void Populate(TextWriter trapFile) { - // Ensure that the name of the root directory is consistent - // with the XmlTrapWriter. - // Linux/Windows: java.io.File.getName() returns "" - // On Linux: System.IO.DirectoryInfo.Name returns "/" - // On Windows: System.IO.DirectoryInfo.Name returns "L:\" - string shortName = symbol.Parent == null ? "" : symbol.Name; - - trapFile.folders(this, File.PathAsDatabaseString(Path), shortName); - if (symbol.Parent != null) - { - trapFile.containerparent(Create(Context, symbol.Parent), this); - } + trapFile.folders(this, symbol.Value, symbol.NameWithoutExtension); + if (symbol.ParentDirectory is PathTransformer.ITransformedPath parent) + trapFile.containerparent(Create(Context, parent), this); } public override bool NeedsPopulation => true; public override void WriteId(System.IO.TextWriter trapFile) { - trapFile.Write(DatabasePath); + trapFile.Write(symbol.DatabaseId); trapFile.Write(";folder"); } - public static Folder Create(Context cx, DirectoryInfo folder) => + public static Folder Create(Context cx, PathTransformer.ITransformedPath folder) => FolderFactory.Instance.CreateEntity2(cx, folder); public override Microsoft.CodeAnalysis.Location? ReportingLocation => null; - class FolderFactory : ICachedEntityFactory + class FolderFactory : ICachedEntityFactory { public static readonly FolderFactory Instance = new FolderFactory(); - public Folder Create(Context cx, DirectoryInfo init) => new Folder(cx, init); + public Folder Create(Context cx, PathTransformer.ITransformedPath init) => new Folder(cx, init); } public override TrapStackBehaviour TrapStackBehaviour => TrapStackBehaviour.NoLabel; - public override int GetHashCode() => Path.GetHashCode(); + public override int GetHashCode() => symbol.GetHashCode(); public override bool Equals(object? obj) { - return obj is Folder folder && folder.Path == Path; + return obj is Folder folder && Equals(folder.symbol, symbol); } } } diff --git a/csharp/extractor/Semmle.Extraction/Extractor.cs b/csharp/extractor/Semmle.Extraction/Extractor.cs index 13750c1aa5c..7b176f20c9f 100644 --- a/csharp/extractor/Semmle.Extraction/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction/Extractor.cs @@ -81,6 +81,11 @@ namespace Semmle.Extraction /// ILogger Logger { get; } + /// + /// The path transformer to apply. + /// + PathTransformer PathTransformer { get; } + /// /// Creates a new context. /// @@ -111,11 +116,14 @@ namespace Semmle.Extraction /// /// If the extraction is standalone. /// The name of the output DLL/EXE, or null if not specified (standalone extraction). - public Extractor(bool standalone, string outputPath, ILogger logger) + /// The object used for logging. + /// The object used for path transformations. + public Extractor(bool standalone, string outputPath, ILogger logger, PathTransformer pathTransformer) { Standalone = standalone; OutputPath = outputPath; Logger = logger; + PathTransformer = pathTransformer; } // Limit the number of error messages in the log file @@ -205,5 +213,7 @@ namespace Semmle.Extraction public ILogger Logger { get; private set; } public static string Version => $"{ThisAssembly.Git.BaseTag} ({ThisAssembly.Git.Sha})"; + + public PathTransformer PathTransformer { get; } } } diff --git a/csharp/extractor/Semmle.Extraction/Layout.cs b/csharp/extractor/Semmle.Extraction/Layout.cs index 1e44c3142e4..d740d2c05b9 100644 --- a/csharp/extractor/Semmle.Extraction/Layout.cs +++ b/csharp/extractor/Semmle.Extraction/Layout.cs @@ -54,14 +54,15 @@ namespace Semmle.Extraction /// /// The source file. /// The full filepath of the trap file. - public string GetTrapPath(ILogger logger, string srcFile, TrapWriter.CompressionMode trapCompression) => TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression); + public string GetTrapPath(ILogger logger, PathTransformer.ITransformedPath srcFile, TrapWriter.CompressionMode trapCompression) => + TrapWriter.TrapPath(logger, TRAP_FOLDER, srcFile, trapCompression); /// /// Creates a trap writer for a given source/assembly file. /// /// The source file. /// A newly created TrapWriter. - public TrapWriter CreateTrapWriter(ILogger logger, string srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => + public TrapWriter CreateTrapWriter(ILogger logger, PathTransformer.ITransformedPath srcFile, bool discardDuplicates, TrapWriter.CompressionMode trapCompression) => new TrapWriter(logger, srcFile, TRAP_FOLDER, SOURCE_ARCHIVE, discardDuplicates, trapCompression); } @@ -73,7 +74,7 @@ namespace Semmle.Extraction /// /// The file to look up. /// The relevant subproject, or null if not found. - public SubProject? LookupProjectOrNull(string sourceFile) + public SubProject? LookupProjectOrNull(PathTransformer.ITransformedPath sourceFile) { if (!useLayoutFile) return DefaultProject; @@ -89,7 +90,7 @@ namespace Semmle.Extraction /// /// The file to look up. /// The relevant subproject, or DefaultProject if not found. - public SubProject LookupProjectOrDefault(string sourceFile) + public SubProject LookupProjectOrDefault(PathTransformer.ITransformedPath sourceFile) { return LookupProjectOrNull(sourceFile) ?? DefaultProject; } @@ -134,7 +135,7 @@ namespace Semmle.Extraction /// /// The absolute path of the file to query. /// True iff there is no layout file or the layout file specifies the file. - public bool FileInLayout(string path) => LookupProjectOrNull(path) != null; + public bool FileInLayout(PathTransformer.ITransformedPath path) => LookupProjectOrNull(path) != null; void ReadLayoutFile(string layout) { @@ -197,6 +198,6 @@ namespace Semmle.Extraction } } - public bool Matches(string path) => FilePattern.Matches(filePatterns, path, out var _); + public bool Matches(PathTransformer.ITransformedPath path) => FilePattern.Matches(filePatterns, path.Value, out var _); } } diff --git a/csharp/extractor/Semmle.Extraction/PathTransformer.cs b/csharp/extractor/Semmle.Extraction/PathTransformer.cs new file mode 100644 index 00000000000..3c972f48114 --- /dev/null +++ b/csharp/extractor/Semmle.Extraction/PathTransformer.cs @@ -0,0 +1,178 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Diagnostics.CodeAnalysis; +using Semmle.Util; + +namespace Semmle.Extraction +{ + /// + /// A class for interpreting path transformers specified using the environment + /// variable `CODEQL_PATH_TRANSFORMER`. + /// + public sealed class PathTransformer + { + public class InvalidPathTransformerException : Exception + { + public InvalidPathTransformerException(string message) : + base($"Invalid path transformer specification: {message}") + { } + } + + /// + /// A transformed path. + /// + public interface ITransformedPath + { + string Value { get; } + + string Extension { get; } + + string NameWithoutExtension { get; } + + ITransformedPath? ParentDirectory { get; } + + ITransformedPath WithSuffix(string suffix); + + string DatabaseId { get; } + } + + struct TransformedPath : ITransformedPath + { + public TransformedPath(string value) { this.value = value; } + readonly string value; + + public string Value => value; + + public string Extension => Path.GetExtension(value)?.Substring(1) ?? ""; + + public string NameWithoutExtension => Path.GetFileNameWithoutExtension(value); + + public ITransformedPath? ParentDirectory + { + get + { + var dir = Path.GetDirectoryName(value); + if (dir is null) + return null; + var isWindowsDriveLetter = dir.Length == 2 && char.IsLetter(dir[0]) && dir[1] == ':'; + if (isWindowsDriveLetter) + return null; + return new TransformedPath(FileUtils.ConvertToUnix(dir)); + } + } + + public ITransformedPath WithSuffix(string suffix) => new TransformedPath(value + suffix); + + public string DatabaseId + { + get + { + var ret = value; + if (ret.Length >= 2 && ret[1] == ':' && Char.IsLower(ret[0])) + ret = Char.ToUpper(ret[0]) + "_" + ret.Substring(2); + return ret.Replace('\\', '/').Replace(":", "_"); + } + } + + public override int GetHashCode() => 11 * value.GetHashCode(); + + public override bool Equals(object? obj) => obj is TransformedPath tp && tp.value == value; + + public override string ToString() => value; + } + + readonly Func transform; + + /// + /// Returns the path obtained by transforming `path`. + /// + public ITransformedPath Transform(string path) => new TransformedPath(transform(path)); + + /// + /// Default constructor reads parameters from the environment. + /// + public PathTransformer(IPathCache pathCache) : + this(pathCache, Environment.GetEnvironmentVariable("CODEQL_PATH_TRANSFORMER") is string file ? File.ReadAllLines(file) : null) + { + } + + /// + /// Creates a path transformer based on the specification in `lines`. + /// Throws `InvalidPathTransformerException` for invalid specifications. + /// + public PathTransformer(IPathCache pathCache, string[]? lines) + { + if (lines is null) + { + transform = path => FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path)); + return; + } + + var sections = ParsePathTransformerSpec(lines); + transform = path => + { + path = FileUtils.ConvertToUnix(pathCache.GetCanonicalPath(path)); + foreach (var section in sections) + { + if (section.Matches(path, out var transformed)) + return transformed; + } + return path; + }; + } + + static IEnumerable ParsePathTransformerSpec(string[] lines) + { + var sections = new List(); + try + { + int i = 0; + while (i < lines.Length && !lines[i].StartsWith("#")) + i++; + while (i < lines.Length) + { + var section = new TransformerSection(lines, ref i); + sections.Add(section); + } + + if (sections.Count == 0) + throw new InvalidPathTransformerException("contains no sections."); + } + catch (InvalidFilePatternException ex) + { + throw new InvalidPathTransformerException(ex.Message); + } + return sections; + } + } + + sealed class TransformerSection + { + readonly string name; + readonly List filePatterns = new List(); + + public TransformerSection(string[] lines, ref int i) + { + name = lines[i++].Substring(1); + while (i < lines.Length && !lines[i].StartsWith("#")) + { + if (string.IsNullOrEmpty(lines[i])) + i++; + else + filePatterns.Add(new FilePattern(lines[i++])); + } + } + + public bool Matches(string path, [NotNullWhen(true)] out string? transformed) + { + if (FilePattern.Matches(filePatterns, path, out var suffix)) + { + transformed = FileUtils.ConvertToUnix(name) + suffix; + return true; + } + transformed = null; + return false; + } + } +} diff --git a/csharp/extractor/Semmle.Extraction/TrapWriter.cs b/csharp/extractor/Semmle.Extraction/TrapWriter.cs index 7ea08eafc1c..8082567c825 100644 --- a/csharp/extractor/Semmle.Extraction/TrapWriter.cs +++ b/csharp/extractor/Semmle.Extraction/TrapWriter.cs @@ -14,12 +14,6 @@ namespace Semmle.Extraction public sealed class TrapWriter : IDisposable { - public enum InnerPathComputation - { - ABSOLUTE, - RELATIVE - } - public enum CompressionMode { None, @@ -45,7 +39,7 @@ namespace Semmle.Extraction readonly CompressionMode TrapCompression; - public TrapWriter(ILogger logger, string outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression) + public TrapWriter(ILogger logger, PathTransformer.ITransformedPath outputfile, string? trap, string? archive, bool discardDuplicates, CompressionMode trapCompression) { Logger = logger; TrapCompression = trapCompression; @@ -107,16 +101,17 @@ namespace Semmle.Extraction /// Adds the specified input file to the source archive. It may end up in either the normal or long path area /// of the source archive, depending on the length of its full path. /// - /// The path to the input file. + /// The path to the input file. + /// The transformed path to the input file. /// The encoding used by the input file. - public void Archive(string inputPath, Encoding inputEncoding) + public void Archive(string originalPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) { if (string.IsNullOrEmpty(archive)) return; // Calling GetFullPath makes this use the canonical capitalisation, if the file exists. - string fullInputPath = Path.GetFullPath(inputPath); + string fullInputPath = Path.GetFullPath(originalPath); - ArchivePath(fullInputPath, inputEncoding); + ArchivePath(fullInputPath, transformedPath, inputEncoding); } /// @@ -124,14 +119,11 @@ namespace Semmle.Extraction /// /// The path of the file. /// The contents of the file. - public void Archive(string inputPath, string contents) + public void Archive(PathTransformer.ITransformedPath inputPath, string contents) { if (string.IsNullOrEmpty(archive)) return; - // Calling GetFullPath makes this use the canonical capitalisation, if the file exists. - string fullInputPath = Path.GetFullPath(inputPath); - - ArchiveContents(fullInputPath, contents); + ArchiveContents(inputPath, contents); } /// @@ -210,18 +202,19 @@ namespace Semmle.Extraction /// source archive less than the system path limit of 260 characters. /// /// The full path to the input file. + /// The transformed path to the input file. /// The encoding used by the input file. /// If the output path in the source archive would /// exceed the system path limit of 260 characters. - private void ArchivePath(string fullInputPath, Encoding inputEncoding) + private void ArchivePath(string fullInputPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) { string contents = File.ReadAllText(fullInputPath, inputEncoding); - ArchiveContents(fullInputPath, contents); + ArchiveContents(transformedPath, contents); } - private void ArchiveContents(string fullInputPath, string contents) + private void ArchiveContents(PathTransformer.ITransformedPath transformedPath, string contents) { - string dest = NestPaths(Logger, archive, fullInputPath, InnerPathComputation.ABSOLUTE); + string dest = NestPaths(Logger, archive, transformedPath.Value); string tmpSrcFile = Path.GetTempFileName(); File.WriteAllText(tmpSrcFile, contents, UTF8); try @@ -236,14 +229,11 @@ namespace Semmle.Extraction } } - public static string NestPaths(ILogger logger, string? outerpath, string innerpath, InnerPathComputation innerPathComputation) + public static string NestPaths(ILogger logger, string? outerpath, string innerpath) { string nested = innerpath; if (!string.IsNullOrEmpty(outerpath)) { - if (!Path.IsPathRooted(innerpath) && innerPathComputation == InnerPathComputation.ABSOLUTE) - innerpath = Path.GetFullPath(innerpath); - // Remove all leading path separators / or \ // For example, UNC paths have two leading \\ innerpath = innerpath.TrimStart(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); @@ -276,13 +266,13 @@ namespace Semmle.Extraction } } - public static string TrapPath(ILogger logger, string? folder, string filename, TrapWriter.CompressionMode trapCompression) + public static string TrapPath(ILogger logger, string? folder, PathTransformer.ITransformedPath path, TrapWriter.CompressionMode trapCompression) { - filename = $"{Path.GetFullPath(filename)}.trap{TrapExtension(trapCompression)}"; + var filename = $"{path.Value}.trap{TrapExtension(trapCompression)}"; if (string.IsNullOrEmpty(folder)) folder = Directory.GetCurrentDirectory(); - return NestPaths(logger, folder, filename, InnerPathComputation.ABSOLUTE); ; + return NestPaths(logger, folder, filename); } } } diff --git a/csharp/extractor/Semmle.Util/CanonicalPathCache.cs b/csharp/extractor/Semmle.Util/CanonicalPathCache.cs index bbc8ab995b4..339641ecb35 100644 --- a/csharp/extractor/Semmle.Util/CanonicalPathCache.cs +++ b/csharp/extractor/Semmle.Util/CanonicalPathCache.cs @@ -222,6 +222,29 @@ namespace Semmle.Util this.pathStrategy = pathStrategy; } + + /// + /// Create a CanonicalPathCache. + /// + /// + /// + /// Creates the appropriate PathStrategy object which encapsulates + /// the correct algorithm. Falls back to different implementations + /// depending on platform. + /// + /// + /// Size of the cache. + /// Policy for following symlinks. + /// A new CanonicalPathCache. + public static CanonicalPathCache Create(ILogger logger, int maxCapacity) + { + var preserveSymlinks = + Environment.GetEnvironmentVariable("CODEQL_PRESERVE_SYMLINKS") == "true" || + Environment.GetEnvironmentVariable("SEMMLE_PRESERVE_SYMLINKS") == "true"; + return Create(logger, maxCapacity, preserveSymlinks ? CanonicalPathCache.Symlinks.Preserve : CanonicalPathCache.Symlinks.Follow); + + } + /// /// Create a CanonicalPathCache. /// From 4d0a1ee8578a061f3286b6e4b7a626e4d9211921 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 8 Sep 2020 09:29:35 +0200 Subject: [PATCH 04/10] Address review comments --- .../PathTransformer.cs | 4 +-- .../Semmle.Extraction/FilePattern.cs | 34 +++++++++++-------- .../Semmle.Extraction/PathTransformer.cs | 11 +++--- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs index 04865ceb8c1..b0f0ba8c51f 100644 --- a/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs +++ b/csharp/extractor/Semmle.Extraction.Tests/PathTransformer.cs @@ -31,10 +31,10 @@ namespace Semmle.Extraction.Tests var pathTransformer = new PathTransformer(new PathCacheStub(), spec); // Windows-style matching - Assert.Equal(@"C:\bar.cs", pathTransformer.Transform(@"C:\bar.cs").Value); + Assert.Equal(@"C:/bar.cs", pathTransformer.Transform(@"C:\bar.cs").Value); Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent42\src\file.cs").Value); Assert.Equal("D:/src/file.cs", pathTransformer.Transform(@"C:\agent43\src\file.cs").Value); - Assert.Equal(@"C:\agent43\src\external\file.cs", pathTransformer.Transform(@"C:\agent43\src\external\file.cs").Value); + Assert.Equal(@"C:/agent43/src/external/file.cs", pathTransformer.Transform(@"C:\agent43\src\external\file.cs").Value); // Linux-style matching Assert.Equal(@"src2/src/file.cs", pathTransformer.Transform(@"/agent/src/file.cs").Value); diff --git a/csharp/extractor/Semmle.Extraction/FilePattern.cs b/csharp/extractor/Semmle.Extraction/FilePattern.cs index c648faadbe1..2d61badd2f2 100644 --- a/csharp/extractor/Semmle.Extraction/FilePattern.cs +++ b/csharp/extractor/Semmle.Extraction/FilePattern.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Diagnostics.CodeAnalysis; @@ -15,7 +16,7 @@ namespace Semmle.Extraction } /// - /// An file pattern, as used in either an extractor layout file or + /// A file pattern, as used in either an extractor layout file or /// a path transformer file. /// public sealed class FilePattern @@ -27,11 +28,12 @@ namespace Semmle.Extraction public FilePattern(string pattern) { - Include = false; + Include = true; if (pattern.StartsWith("-")) + { pattern = pattern.Substring(1); - else - Include = true; + Include = false; + } pattern = FileUtils.ConvertToUnix(pattern.Trim()).TrimStart('/'); RegexPattern = BuildRegex(pattern).ToString(); } @@ -103,19 +105,23 @@ namespace Semmle.Extraction public static bool Matches(IEnumerable patterns, string path, [NotNullWhen(true)] out string? transformerSuffix) { path = FileUtils.ConvertToUnix(path).TrimStart('/'); - Match? lastMatch = null; - foreach (var pattern in patterns) + + foreach (var pattern in patterns.Reverse()) { var m = new Regex(pattern.RegexPattern).Match(path); if (m.Success) - lastMatch = pattern.Include ? m : null; - } - if (lastMatch is Match) - { - transformerSuffix = lastMatch.Groups.TryGetValue("doubleslash", out var group) - ? path.Substring(group.Index) - : path; - return true; + { + if (pattern.Include) + { + transformerSuffix = m.Groups.TryGetValue("doubleslash", out var group) + ? path.Substring(group.Index) + : path; + return true; + } + + transformerSuffix = null; + return false; + } } transformerSuffix = null; diff --git a/csharp/extractor/Semmle.Extraction/PathTransformer.cs b/csharp/extractor/Semmle.Extraction/PathTransformer.cs index 3c972f48114..2c9770e790e 100644 --- a/csharp/extractor/Semmle.Extraction/PathTransformer.cs +++ b/csharp/extractor/Semmle.Extraction/PathTransformer.cs @@ -154,13 +154,12 @@ namespace Semmle.Extraction public TransformerSection(string[] lines, ref int i) { - name = lines[i++].Substring(1); - while (i < lines.Length && !lines[i].StartsWith("#")) + name = lines[i++].Substring(1); // skip the '#' + for (; i < lines.Length && !lines[i].StartsWith("#"); i++) { - if (string.IsNullOrEmpty(lines[i])) - i++; - else - filePatterns.Add(new FilePattern(lines[i++])); + var line = lines[i]; + if (!string.IsNullOrWhiteSpace(line)) + filePatterns.Add(new FilePattern(line)); } } From b1e6e3a6f23c9f6c03c4f8b752aa5f6a38112da1 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 8 Sep 2020 14:18:20 +0200 Subject: [PATCH 05/10] Java: Add 1.25 change notes. --- change-notes/1.25/analysis-java.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/change-notes/1.25/analysis-java.md b/change-notes/1.25/analysis-java.md index 7cdd9e491a2..5adb02e4d47 100644 --- a/change-notes/1.25/analysis-java.md +++ b/change-notes/1.25/analysis-java.md @@ -4,6 +4,8 @@ The following changes in version 1.25 affect Java analysis in all applications. ## General improvements +The Java autobuilder has been improved to detect more Gradle Java versions. + ## New queries | **Query** | **Tags** | **Purpose** | @@ -14,10 +16,20 @@ The following changes in version 1.25 affect Java analysis in all applications. | **Query** | **Expected impact** | **Change** | |------------------------------|------------------------|-----------------------------------| - +| Hard-coded credential in API call (`java/hardcoded-credential-api-call`) | More results | The query now recognizes the `BasicAWSCredentials` class of the Amazon client SDK library with hardcoded access key/secret key. | +| Deserialization of user-controlled data (`java/unsafe-deserialization`) | Fewer false positive results | The query no longer reports results using `org.apache.commons.io.serialization.ValidatingObjectInputStream`. | +| Use of a broken or risky cryptographic algorithm (`java/weak-cryptographic-algorithm`) | More results | The query now recognizes the `MessageDigest.getInstance` method. | +| Use of a potentially broken or risky cryptographic algorithm (`java/potentially-weak-cryptographic-algorithm`) | More results | The query now recognizes the `MessageDigest.getInstance` method. | +| Reading from a world writable file (`java/world-writable-file-read`) | More results | The query now recognizes more JDK file operations. | ## Changes to libraries +* The data-flow library has been improved with more taint flow modeling for the + Collections framework and other classes of the JDK. This affects all security + queries using data flow and can yield additional results. +* The data-flow library has been improved with more taint flow modeling for the + Spring framework. This affects all security queries using data flow and can + yield additional results on project that rely on the Spring framework. * The data-flow library has been improved, which affects most security queries by potentially adding more results. Flow through methods now takes nested field reads/writes into account. For example, the library is able to track flow from `"taint"` to `sink()` via the method @@ -39,3 +51,5 @@ The following changes in version 1.25 affect Java analysis in all applications. } } ``` +* The library has been extended with more support for Java 14 features + (`switch` expressions and pattern-matching for `instanceof`). From 2979f9813ee0ba08779623a72106f00ab35e30d8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 8 Sep 2020 14:27:12 +0200 Subject: [PATCH 06/10] Python: Add missing change notes I looked through PRs between rc/1.24 and rc/1.25 and added missing change notes for: - https://github.com/github/codeql/pull/3314 - https://github.com/github/codeql/pull/3302 - https://github.com/github/codeql/pull/3212 - https://github.com/github/codeql/pull/3453 - https://github.com/github/codeql/pull/3407 - https://github.com/github/codeql/pull/3563 ``` git log --grep="Merge pull request" --format=oneline rc/1.24..rc/1.25 -- python/ ``` --- change-notes/1.25/analysis-python.md | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/change-notes/1.25/analysis-python.md b/change-notes/1.25/analysis-python.md index 5d0fc69ec80..345cce5a227 100644 --- a/change-notes/1.25/analysis-python.md +++ b/change-notes/1.25/analysis-python.md @@ -1,22 +1,9 @@ # Improvements to Python analysis -The following changes in version 1.25 affect Python analysis in all applications. - -## General improvements - - -## New queries - -| **Query** | **Tags** | **Purpose** | -|-----------------------------|-----------|--------------------------------------------------------------------| - - -## Changes to existing queries - -| **Query** | **Expected impact** | **Change** | -|----------------------------|------------------------|------------------------------------------------------------------| - - -## Changes to libraries - * Importing `semmle.python.web.HttpRequest` will no longer import `UntrustedStringKind` transitively. `UntrustedStringKind` is the most commonly used non-abstract subclass of `ExternalStringKind`. If not imported (by one mean or another), taint-tracking queries that concern `ExternalStringKind` will not produce any results. Please ensure such queries contain an explicit import (`import semmle.python.security.strings.Untrusted`). +* Added model of taint sources for HTTP servers using `http.server`. +* Added taint modeling of routed parameters in flask. +* Improved modeling of builtin methods on strings for taint tracking. +* Improved classification of test files. +* New class `BoundMethodValue` exposing information about a bound method. +* The query `py/command-line-injection` now recognizes command execution with the `fabric` and `invoke` Python libraries. From 02da80aa25fc7690bef9a4c77c057571b2c27406 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 8 Sep 2020 14:40:33 +0200 Subject: [PATCH 07/10] Java: Remove "New Queries" section. --- change-notes/1.25/analysis-java.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/change-notes/1.25/analysis-java.md b/change-notes/1.25/analysis-java.md index 5adb02e4d47..ab11e5aaaf1 100644 --- a/change-notes/1.25/analysis-java.md +++ b/change-notes/1.25/analysis-java.md @@ -6,12 +6,6 @@ The following changes in version 1.25 affect Java analysis in all applications. The Java autobuilder has been improved to detect more Gradle Java versions. -## New queries - -| **Query** | **Tags** | **Purpose** | -|-----------------------------|-----------|--------------------------------------------------------------------| - - ## Changes to existing queries | **Query** | **Expected impact** | **Change** | From 038688a55c5519a40c15950a323fdf0ea45d13ae Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 9 Sep 2020 10:34:40 +0200 Subject: [PATCH 08/10] Python: Minor updates to 1.25 change notes backporting fixes from `@sj` --- change-notes/1.25/analysis-python.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/change-notes/1.25/analysis-python.md b/change-notes/1.25/analysis-python.md index 345cce5a227..12586e8aec0 100644 --- a/change-notes/1.25/analysis-python.md +++ b/change-notes/1.25/analysis-python.md @@ -2,8 +2,8 @@ * Importing `semmle.python.web.HttpRequest` will no longer import `UntrustedStringKind` transitively. `UntrustedStringKind` is the most commonly used non-abstract subclass of `ExternalStringKind`. If not imported (by one mean or another), taint-tracking queries that concern `ExternalStringKind` will not produce any results. Please ensure such queries contain an explicit import (`import semmle.python.security.strings.Untrusted`). * Added model of taint sources for HTTP servers using `http.server`. -* Added taint modeling of routed parameters in flask. -* Improved modeling of builtin methods on strings for taint tracking. +* Added taint modeling of routed parameters in Flask. +* Improved modeling of built-in methods on strings for taint tracking. * Improved classification of test files. * New class `BoundMethodValue` exposing information about a bound method. * The query `py/command-line-injection` now recognizes command execution with the `fabric` and `invoke` Python libraries. From 3414063f2eedf3444c65cf3232193e4270476e2c Mon Sep 17 00:00:00 2001 From: Calum Grant <42069085+calumgrant@users.noreply.github.com> Date: Fri, 11 Sep 2020 13:16:26 +0100 Subject: [PATCH 09/10] Update change-notes/1.25/analysis-python.md Co-authored-by: Rasmus Wriedt Larsen --- change-notes/1.25/analysis-python.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change-notes/1.25/analysis-python.md b/change-notes/1.25/analysis-python.md index 12586e8aec0..ed3496bc734 100644 --- a/change-notes/1.25/analysis-python.md +++ b/change-notes/1.25/analysis-python.md @@ -5,5 +5,5 @@ * Added taint modeling of routed parameters in Flask. * Improved modeling of built-in methods on strings for taint tracking. * Improved classification of test files. -* New class `BoundMethodValue` exposing information about a bound method. +* New class `BoundMethodValue` represents a bound method during runtime. * The query `py/command-line-injection` now recognizes command execution with the `fabric` and `invoke` Python libraries. From e54937756129301eebb6428f97a3bd03eb0929b3 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Mon, 14 Sep 2020 11:00:59 +0200 Subject: [PATCH 10/10] C#: Construct `File::TransformedPathLazy` lazily This avoids calling the path transformer for `GeneratedFile`s. --- csharp/extractor/Semmle.Extraction/Entities/File.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csharp/extractor/Semmle.Extraction/Entities/File.cs b/csharp/extractor/Semmle.Extraction/Entities/File.cs index a0e9ea6d725..1a5da1cbe1b 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/File.cs @@ -11,11 +11,12 @@ namespace Semmle.Extraction.Entities : base(cx, path) { OriginalPath = path; - TransformedPath = Context.Extractor.PathTransformer.Transform(OriginalPath); + TransformedPathLazy = new Lazy(() => Context.Extractor.PathTransformer.Transform(OriginalPath)); } readonly string OriginalPath; - readonly PathTransformer.ITransformedPath TransformedPath; + readonly Lazy TransformedPathLazy; + PathTransformer.ITransformedPath TransformedPath => TransformedPathLazy.Value; public override bool NeedsPopulation => Context.DefinesFile(OriginalPath) || OriginalPath == Context.Extractor.OutputPath;