using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Diagnostics.CodeAnalysis; using Semmle.Util; namespace Semmle.Extraction { public sealed class InvalidFilePatternException : Exception { public InvalidFilePatternException(string pattern, string message) : base($"Invalid file pattern '{pattern}': {message}") { } } /// /// A file pattern, as used in either an extractor layout file or /// a path transformer file. /// public sealed class FilePattern { /// /// Whether this is an inclusion pattern. /// public bool Include { get; } public FilePattern(string pattern) { Include = true; if (pattern.StartsWith("-")) { pattern = pattern.Substring(1); Include = false; } pattern = FileUtils.ConvertToUnix(pattern.Trim()).TrimStart('/'); RegexPattern = BuildRegex(pattern).ToString(); } /// /// Constructs a regex string from a file pattern. Throws /// `InvalidFilePatternException` for invalid patterns. /// static StringBuilder BuildRegex(string pattern) { bool HasCharAt(int i, Predicate p) => i >= 0 && i < pattern.Length && p(pattern[i]); var sb = new StringBuilder(); var i = 0; var seenDoubleSlash = false; sb.Append('^'); while (i < pattern.Length) { if (pattern[i] == '/') { if (HasCharAt(i + 1, c => c == '/')) { if (seenDoubleSlash) throw new InvalidFilePatternException(pattern, "'//' is allowed at most once."); sb.Append("(?/)"); i += 2; seenDoubleSlash = true; } else { sb.Append('/'); i++; } } else if (pattern[i] == '*') { if (HasCharAt(i + 1, c => c == '*')) { if (HasCharAt(i - 1, c => c != '/')) throw new InvalidFilePatternException(pattern, "'**' preceeded by non-`/` character."); if (HasCharAt(i + 2, c => c != '/')) throw new InvalidFilePatternException(pattern, "'**' succeeded by non-`/` character"); sb.Append(".*"); i += 2; } else { sb.Append("[^/]*"); i++; } } else sb.Append(Regex.Escape(pattern[i++].ToString())); } return sb.Append(".*"); } /// /// The regex pattern compiled from this file pattern. /// public string RegexPattern { get; } /// /// Returns `true` if the set of file patterns `patterns` match the path `path`. /// If so, `transformerSuffix` will contain the part of `path` that needs to be /// suffixed when using path transformers. /// public static bool Matches(IEnumerable patterns, string path, [NotNullWhen(true)] out string? transformerSuffix) { path = FileUtils.ConvertToUnix(path).TrimStart('/'); foreach (var pattern in patterns.Reverse()) { var m = new Regex(pattern.RegexPattern).Match(path); if (m.Success) { if (pattern.Include) { transformerSuffix = m.Groups.TryGetValue("doubleslash", out var group) ? path.Substring(group.Index) : path; return true; } transformerSuffix = null; return false; } } transformerSuffix = null; return false; } } }