using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; using Microsoft.CodeAnalysis.Text; using Semmle.Util; using Semmle.Util.Logging; namespace Semmle.Extraction.CSharp { public enum ExitCode { Ok, // Everything worked perfectly Errors, // Trap was generated but there were processing errors Failed // Trap could not be generated } public static class Extractor { private class LogProgressMonitor : IProgressMonitor { private readonly ILogger logger; public LogProgressMonitor(ILogger logger) { this.logger = logger; } public void Analysed(int item, int total, string source, string output, TimeSpan time, AnalysisAction action) { if (action != AnalysisAction.UpToDate) { logger.Log(Severity.Info, " {0} ({1})", source, action == AnalysisAction.Extracted ? time.ToString() : action == AnalysisAction.Excluded ? "excluded" : "up to date"); } } public void Started(int item, int total, string source) { } public void MissingNamespace(string @namespace) { } public void MissingSummary(int types, int namespaces) { } public void MissingType(string type) { } } /// /// Set the application culture to the invariant culture. /// /// This is required among others to ensure that the invariant culture is used for value formatting during TRAP /// file writing. /// public static void SetInvariantCulture() { var culture = CultureInfo.InvariantCulture; CultureInfo.DefaultThreadCurrentCulture = culture; CultureInfo.DefaultThreadCurrentUICulture = culture; Thread.CurrentThread.CurrentCulture = culture; Thread.CurrentThread.CurrentUICulture = culture; } public static ILogger MakeLogger(Verbosity verbosity, bool includeConsole) { var fileLogger = new FileLogger(verbosity, GetCSharpLogPath(), logThreadId: true); return includeConsole ? new CombinedLogger(new ConsoleLogger(verbosity, logThreadId: true), fileLogger) : (ILogger)fileLogger; } /// /// Command-line driver for the extractor. /// /// /// /// The extractor can be invoked in one of two ways: Either as an "analyser" passed in via the /a /// option to csc.exe, or as a stand-alone executable. In this case, we need to faithfully /// drive Roslyn in the way that csc.exe would. /// /// /// Command line arguments as passed to csc.exe /// public static ExitCode Run(string[] args) { var stopwatch = new Stopwatch(); stopwatch.Start(); var options = Options.CreateWithEnvironment(args); var workingDirectory = Directory.GetCurrentDirectory(); var compilerArgs = options.CompilerArguments.ToArray(); using var logger = MakeLogger(options.Verbosity, options.Console); var canonicalPathCache = CanonicalPathCache.Create(logger, 1000); var pathTransformer = new PathTransformer(canonicalPathCache); using var analyser = new TracingAnalyser(new LogProgressMonitor(logger), logger, options.AssemblySensitiveTrap, pathTransformer); try { if (options.ProjectsToLoad.Any()) { AddSourceFilesFromProjects(options.ProjectsToLoad, options.CompilerArguments, logger); } var compilerVersion = new CompilerVersion(options); if (compilerVersion.SkipExtraction) { logger.Log(Severity.Warning, " Unrecognized compiler '{0}' because {1}", compilerVersion.SpecifiedCompiler, compilerVersion.SkipReason); return ExitCode.Ok; } var compilerArguments = CSharpCommandLineParser.Default.Parse( compilerVersion.ArgsWithResponse, workingDirectory, compilerVersion.FrameworkPath, compilerVersion.AdditionalReferenceDirectories ); if (compilerArguments is null) { var sb = new StringBuilder(); sb.Append(" Failed to parse command line: ").AppendList(" ", compilerArgs); logger.Log(Severity.Error, sb.ToString()); ++analyser.CompilationErrors; return ExitCode.Failed; } if (!analyser.BeginInitialize(compilerVersion.ArgsWithResponse)) { logger.Log(Severity.Info, "Skipping extraction since files have already been extracted"); return ExitCode.Ok; } return AnalyseTracing(workingDirectory, compilerArgs, analyser, compilerArguments, options, canonicalPathCache, stopwatch); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { logger.Log(Severity.Error, " Unhandled exception: {0}", ex); return ExitCode.Errors; } } private static void AddSourceFilesFromProjects(IEnumerable projectsToLoad, IList compilerArguments, ILogger logger) { logger.Log(Severity.Info, " Loading referenced projects."); var projects = new Queue(projectsToLoad); var processed = new HashSet(); while (projects.Count > 0) { var project = projects.Dequeue(); var fi = new FileInfo(project); if (processed.Contains(fi.FullName)) { continue; } processed.Add(fi.FullName); logger.Log(Severity.Info, " Processing referenced project: " + fi.FullName); var csProj = new CsProjFile(fi); foreach (var cs in csProj.Sources) { if (cs.Contains("/obj/")) { continue; } compilerArguments.Add(cs); } foreach (var pr in csProj.ProjectReferences) { projects.Enqueue(pr); } } } /// /// Gets the complete list of locations to locate references. /// /// Command line arguments. /// List of directories. private static IEnumerable FixedReferencePaths(Microsoft.CodeAnalysis.CommandLineArguments args) { // See https://msdn.microsoft.com/en-us/library/s5bac5fx.aspx // on how csc resolves references. Basically, // 1) Current working directory. This is the directory from which the compiler is invoked. // 2) The common language runtime system directory. // 3) Directories specified by / lib. // 4) Directories specified by the LIB environment variable. if (args.BaseDirectory is not null) { yield return args.BaseDirectory; } foreach (var r in args.ReferencePaths) yield return r; var lib = System.Environment.GetEnvironmentVariable("LIB"); if (lib is not null) yield return lib; } private static MetadataReference MakeReference(CommandLineReference reference, string path) { return MetadataReference.CreateFromFile(path).WithProperties(reference.Properties); } /// /// Construct tasks for resolving references (possibly in parallel). /// /// The resolved references will be added (thread-safely) to the supplied /// list . /// private static IEnumerable ResolveReferences(Microsoft.CodeAnalysis.CommandLineArguments args, Analyser analyser, CanonicalPathCache canonicalPathCache, BlockingCollection ret) { var referencePaths = new Lazy(() => FixedReferencePaths(args).ToArray()); return args.MetadataReferences.Select(clref => () => { if (Path.IsPathRooted(clref.Reference)) { if (File.Exists(clref.Reference)) { var reference = MakeReference(clref, canonicalPathCache.GetCanonicalPath(clref.Reference)); ret.Add(reference); } else { lock (analyser) { analyser.Logger.Log(Severity.Error, " Reference '{0}' does not exist", clref.Reference); ++analyser.CompilationErrors; } } } else { var composed = referencePaths.Value .Select(path => Path.Combine(path, clref.Reference)) .Where(path => File.Exists(path)) .Select(path => canonicalPathCache.GetCanonicalPath(path)) .FirstOrDefault(); if (composed is not null) { var reference = MakeReference(clref, composed); ret.Add(reference); } else { lock (analyser) { analyser.Logger.Log(Severity.Error, " Unable to resolve reference '{0}'", clref.Reference); ++analyser.CompilationErrors; } } } }); } /// /// Construct tasks for reading source code files (possibly in parallel). /// /// The constructed syntax trees will be added (thread-safely) to the supplied /// list . /// public static IEnumerable ReadSyntaxTrees(IEnumerable sources, Analyser analyser, CSharpParseOptions? parseOptions, Encoding? encoding, IList ret) { return sources.Select(path => () => { try { using var file = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); analyser.Logger.Log(Severity.Trace, $"Parsing source file: '{path}'"); var tree = CSharpSyntaxTree.ParseText(SourceText.From(file, encoding), parseOptions, path); analyser.Logger.Log(Severity.Trace, $"Source file parsed: '{path}'"); lock (ret) { ret.Add(tree); } } catch (IOException ex) { lock (analyser) { analyser.Logger.Log(Severity.Error, " Unable to open source file {0}: {1}", path, ex.Message); ++analyser.CompilationErrors; } } }); } public static ExitCode Analyse(Stopwatch stopwatch, Analyser analyser, CommonOptions options, Func, IEnumerable> getResolvedReferenceTasks, Func, IEnumerable> getSyntaxTreeTasks, Func, IEnumerable, CSharpCompilation> getCompilation, Action initializeAnalyser, Action postProcess) { using var references = new BlockingCollection(); var referenceTasks = getResolvedReferenceTasks(references); var syntaxTrees = new List(); var syntaxTreeTasks = getSyntaxTreeTasks(analyser, syntaxTrees); var sw = new Stopwatch(); sw.Start(); Parallel.Invoke( new ParallelOptions { MaxDegreeOfParallelism = options.Threads }, referenceTasks.Interleave(syntaxTreeTasks).ToArray()); if (syntaxTrees.Count == 0) { analyser.Logger.Log(Severity.Error, " No source files"); ++analyser.CompilationErrors; if (analyser is TracingAnalyser) { return ExitCode.Failed; } } var compilation = getCompilation(syntaxTrees, references); initializeAnalyser(compilation, options); analyser.AnalyseCompilation(); analyser.AnalyseReferences(); foreach (var tree in compilation.SyntaxTrees) { analyser.AnalyseTree(tree); } sw.Stop(); analyser.Logger.Log(Severity.Info, " Models constructed in {0}", sw.Elapsed); var elapsed = sw.Elapsed; var currentProcess = Process.GetCurrentProcess(); var cpuTime1 = currentProcess.TotalProcessorTime; var userTime1 = currentProcess.UserProcessorTime; sw.Restart(); analyser.PerformExtraction(options.Threads); sw.Stop(); var cpuTime2 = currentProcess.TotalProcessorTime; var userTime2 = currentProcess.UserProcessorTime; var performance = new Entities.PerformanceMetrics() { Frontend = new Entities.Timings() { Elapsed = elapsed, Cpu = cpuTime1, User = userTime1 }, Extractor = new Entities.Timings() { Elapsed = sw.Elapsed, Cpu = cpuTime2 - cpuTime1, User = userTime2 - userTime1 }, Total = new Entities.Timings() { Elapsed = stopwatch.Elapsed, Cpu = cpuTime2, User = userTime2 }, PeakWorkingSet = currentProcess.PeakWorkingSet64 }; analyser.LogPerformance(performance); analyser.Logger.Log(Severity.Info, " Extraction took {0}", sw.Elapsed); postProcess(); return analyser.TotalErrors == 0 ? ExitCode.Ok : ExitCode.Errors; } private static ExitCode AnalyseTracing( string cwd, string[] args, TracingAnalyser analyser, CSharpCommandLineArguments compilerArguments, Options options, CanonicalPathCache canonicalPathCache, Stopwatch stopwatch) { return Analyse(stopwatch, analyser, options, references => ResolveReferences(compilerArguments, analyser, canonicalPathCache, references), (analyser, syntaxTrees) => { var paths = compilerArguments.SourceFiles .Select(src => src.Path) .ToList(); if (compilerArguments.GeneratedFilesOutputDirectory is not null) { paths.AddRange(Directory.GetFiles(compilerArguments.GeneratedFilesOutputDirectory, "*.cs", new EnumerationOptions { RecurseSubdirectories = true, MatchCasing = MatchCasing.CaseInsensitive })); } return ReadSyntaxTrees( paths.Select(canonicalPathCache.GetCanonicalPath), analyser, compilerArguments.ParseOptions, compilerArguments.Encoding, syntaxTrees); }, (syntaxTrees, references) => { // csc.exe (CSharpCompiler.cs) also provides CompilationOptions // .WithMetadataReferenceResolver(), // .WithXmlReferenceResolver() and // .WithSourceReferenceResolver(). // These would be needed if we hadn't explicitly provided the source/references // already. return CSharpCompilation.Create( compilerArguments.CompilationName, syntaxTrees, references, compilerArguments.CompilationOptions .WithAssemblyIdentityComparer(DesktopAssemblyIdentityComparer.Default) .WithStrongNameProvider(new DesktopStrongNameProvider(compilerArguments.KeyFileSearchPaths)) .WithMetadataImportOptions(MetadataImportOptions.All) ); }, (compilation, options) => analyser.EndInitialize(compilerArguments, options, compilation, cwd, args), () => { }); } /// /// Gets the path to the `csharp.log` file written to by the C# extractor. /// public static string GetCSharpLogPath() => Path.Combine(GetCSharpLogDirectory(), "csharp.log"); /// /// Gets the path to a `csharp.{hash}.txt` file written to by the C# extractor. /// public static string GetCSharpArgsLogPath(string hash) => Path.Combine(GetCSharpLogDirectory(), $"csharp.{hash}.txt"); /// /// Gets a list of all `csharp.{hash}.txt` files currently written to the log directory. /// public static IEnumerable GetCSharpArgsLogs() { try { return Directory.EnumerateFiles(GetCSharpLogDirectory(), "csharp.*.txt"); } catch (DirectoryNotFoundException) { // If the directory does not exist, there are no log files return Enumerable.Empty(); } } private static string GetCSharpLogDirectory() { var codeQlLogDir = Environment.GetEnvironmentVariable("CODEQL_EXTRACTOR_CSHARP_LOG_DIR"); if (!string.IsNullOrEmpty(codeQlLogDir)) return codeQlLogDir; return Directory.GetCurrentDirectory(); } } }