using System; using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; using System.IO; using System.Linq; using Semmle.Extraction.CSharp.Populators; using System.Collections.Generic; using System.Threading.Tasks; using System.Diagnostics; using Semmle.Util.Logging; namespace Semmle.Extraction.CSharp { /// /// Encapsulates a C# analysis task. /// public class Analyser : IDisposable { protected Extraction.Extractor? extractor; protected CSharpCompilation? compilation; protected Layout? layout; protected CommonOptions? options; private readonly object progressMutex = new object(); // The bulk of the extraction work, potentially executed in parallel. protected readonly List extractionTasks = new List(); private int taskCount = 0; private readonly Stopwatch stopWatch = new Stopwatch(); private readonly IProgressMonitor progressMonitor; public ILogger Logger { get; } protected readonly bool addAssemblyTrapPrefix; public PathTransformer PathTransformer { get; } protected Analyser(IProgressMonitor pm, ILogger logger, bool addAssemblyTrapPrefix, PathTransformer pathTransformer) { Logger = logger; this.addAssemblyTrapPrefix = addAssemblyTrapPrefix; Logger.Log(Severity.Info, "EXTRACTION STARTING at {0}", DateTime.Now); stopWatch.Start(); progressMonitor = pm; PathTransformer = pathTransformer; } /// /// Perform an analysis on a source file/syntax tree. /// /// Syntax tree to analyse. public void AnalyseTree(SyntaxTree tree) { extractionTasks.Add(() => DoExtractTree(tree)); } #nullable disable warnings /// /// Enqueue all reference analysis tasks. /// public void AnalyseReferences() { foreach (var assembly in compilation.References.OfType()) { // CIL first - it takes longer. if (options.CIL) extractionTasks.Add(() => DoExtractCIL(assembly)); extractionTasks.Add(() => DoAnalyseReferenceAssembly(assembly)); } } /// /// Constructs the map from assembly string to its filename. /// /// Roslyn doesn't record the relationship between a filename and its assembly /// information, so we need to retrieve this information manually. /// protected void SetReferencePaths() { foreach (var reference in compilation.References.OfType()) { try { var refPath = reference.FilePath!; /* This method is significantly faster and more lightweight than using * System.Reflection.Assembly.ReflectionOnlyLoadFrom. It is also allows * loading the same assembly from different locations. */ using var pereader = new System.Reflection.PortableExecutable.PEReader(new FileStream(refPath, FileMode.Open, FileAccess.Read, FileShare.Read)); var metadata = pereader.GetMetadata(); string assemblyIdentity; unsafe { var reader = new System.Reflection.Metadata.MetadataReader(metadata.Pointer, metadata.Length); var def = reader.GetAssemblyDefinition(); assemblyIdentity = reader.GetString(def.Name) + " " + def.Version; } extractor.SetAssemblyFile(assemblyIdentity, refPath); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { extractor.Message(new Message("Exception reading reference file", reference.FilePath, null, ex.StackTrace)); } } } /// /// Extract an assembly to a new trap file. /// If the trap file exists, skip extraction to avoid duplicating /// extraction within the snapshot. /// /// The assembly to extract. private void DoAnalyseReferenceAssembly(PortableExecutableReference r) { try { var stopwatch = new Stopwatch(); stopwatch.Start(); var assemblyPath = r.FilePath!; var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); using var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, options.TrapCompression, discardDuplicates: true); var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile); if (!skipExtraction) { /* Note on parallel builds: * * The trap writer and source archiver both perform atomic moves * of the file to the final destination. * * If the same source file or trap file are generated concurrently * (by different parallel invocations of the extractor), then * last one wins. * * Specifically, if two assemblies are analysed concurrently in a build, * then there is a small amount of duplicated work but the output should * still be correct. */ // compilation.Clone() reduces memory footprint by allowing the symbols // in c to be garbage collected. Compilation c = compilation.Clone(); if (c.GetAssemblyOrModuleSymbol(r) is IAssemblySymbol assembly) { var cx = new Context(extractor, c, trapWriter, new AssemblyScope(assembly, assemblyPath), addAssemblyTrapPrefix); foreach (var module in assembly.Modules) { AnalyseNamespace(cx, module.GlobalNamespace); } Entities.Attribute.ExtractAttributes(cx, assembly, Entities.Assembly.Create(cx, assembly.GetSymbolLocation())); cx.PopulateAll(); } } ReportProgress(assemblyPath, trapWriter.TrapFile, stopwatch.Elapsed, skipExtraction ? AnalysisAction.UpToDate : AnalysisAction.Extracted); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { Logger.Log(Severity.Error, " Unhandled exception analyzing {0}: {1}", r.FilePath, ex); } } private void DoExtractCIL(PortableExecutableReference r) { var stopwatch = new Stopwatch(); stopwatch.Start(); CIL.Analyser.ExtractCIL(layout, r.FilePath!, Logger, !options.Cache, options.PDB, options.TrapCompression, out var trapFile, out var extracted); stopwatch.Stop(); ReportProgress(r.FilePath, trapFile, stopwatch.Elapsed, extracted ? AnalysisAction.Extracted : AnalysisAction.UpToDate); } private void DoExtractTree(SyntaxTree tree) { try { var stopwatch = new Stopwatch(); stopwatch.Start(); var sourcePath = tree.FilePath; var transformedSourcePath = PathTransformer.Transform(sourcePath); var projectLayout = layout.LookupProjectOrNull(transformedSourcePath); var excluded = projectLayout is null; var trapPath = excluded ? "" : projectLayout!.GetTrapPath(Logger, transformedSourcePath, options.TrapCompression); var upToDate = false; if (!excluded) { // compilation.Clone() is used to allow symbols to be garbage collected. using var trapWriter = projectLayout!.CreateTrapWriter(Logger, transformedSourcePath, options.TrapCompression, discardDuplicates: false); upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile); if (!upToDate) { var cx = new Context(extractor, compilation.Clone(), trapWriter, new SourceScope(tree), addAssemblyTrapPrefix); // Ensure that the file itself is populated in case the source file is totally empty var root = tree.GetRoot(); Entities.File.Create(cx, root.SyntaxTree.FilePath); var csNode = (CSharpSyntaxNode)root; var directiveVisitor = new DirectiveVisitor(cx); csNode.Accept(directiveVisitor); foreach (var branch in directiveVisitor.BranchesTaken) { cx.TrapStackSuffix.Add(branch); } csNode.Accept(new CompilationUnitVisitor(cx)); cx.PopulateAll(); CommentPopulator.ExtractCommentBlocks(cx, cx.CommentGenerator); cx.PopulateAll(); } } ReportProgress(sourcePath, trapPath, stopwatch.Elapsed, excluded ? AnalysisAction.Excluded : upToDate ? AnalysisAction.UpToDate : AnalysisAction.Extracted); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { extractor.Message(new Message($"Unhandled exception processing syntax tree. {ex.Message}", tree.FilePath, null, ex.StackTrace)); } } #nullable restore warnings private static bool FileIsUpToDate(string src, string dest) { return File.Exists(dest) && File.GetLastWriteTime(dest) >= File.GetLastWriteTime(src); } private void AnalyseNamespace(Context cx, INamespaceSymbol ns) { foreach (var memberNamespace in ns.GetNamespaceMembers()) { AnalyseNamespace(cx, memberNamespace); } foreach (var memberType in ns.GetTypeMembers()) { Entities.Type.Create(cx, memberType).ExtractRecursive(); } } private void ReportProgress(string src, string output, TimeSpan time, AnalysisAction action) { lock (progressMutex) progressMonitor.Analysed(++taskCount, extractionTasks.Count, src, output, time, action); } /// /// Run all extraction tasks. /// /// The number of threads to use. public void PerformExtraction(int numberOfThreads) { Parallel.Invoke( new ParallelOptions { MaxDegreeOfParallelism = numberOfThreads }, extractionTasks.ToArray()); } public virtual void Dispose() { stopWatch.Stop(); Logger.Log(Severity.Info, " Peak working set = {0} MB", Process.GetCurrentProcess().PeakWorkingSet64 / (1024 * 1024)); if (TotalErrors > 0) Logger.Log(Severity.Info, "EXTRACTION FAILED with {0} error{1} in {2}", TotalErrors, TotalErrors == 1 ? "" : "s", stopWatch.Elapsed); else Logger.Log(Severity.Info, "EXTRACTION SUCCEEDED in {0}", stopWatch.Elapsed); Logger.Dispose(); } /// /// Number of errors encountered during extraction. /// private int ExtractorErrors => extractor?.Errors ?? 0; /// /// Number of errors encountered by the compiler. /// public int CompilationErrors { get; set; } /// /// Total number of errors reported. /// public int TotalErrors => CompilationErrors + ExtractorErrors; /// /// Logs information about the extractor. /// public void LogExtractorInfo(string extractorVersion) { Logger.Log(Severity.Info, " Extractor: {0}", Environment.GetCommandLineArgs().First()); Logger.Log(Severity.Info, " Extractor version: {0}", extractorVersion); Logger.Log(Severity.Info, " Current working directory: {0}", Directory.GetCurrentDirectory()); } } }