using System; using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; using System.IO; using System.Linq; using Semmle.Extraction.CSharp.Populators; using System.Collections.Generic; using System.Threading.Tasks; using System.Diagnostics; using Semmle.Util.Logging; using Semmle.Util; namespace Semmle.Extraction.CSharp { /// /// Encapsulates a C# analysis task. /// public class Analyser : IDisposable { IExtractor extractor; readonly Stopwatch stopWatch = new Stopwatch(); readonly IProgressMonitor progressMonitor; public readonly ILogger Logger; public readonly bool AddAssemblyTrapPrefix; public readonly PathTransformer PathTransformer; public Analyser(IProgressMonitor pm, ILogger logger, bool addAssemblyTrapPrefix, PathTransformer pathTransformer) { Logger = logger; AddAssemblyTrapPrefix = addAssemblyTrapPrefix; Logger.Log(Severity.Info, "EXTRACTION STARTING at {0}", DateTime.Now); stopWatch.Start(); progressMonitor = pm; PathTransformer = pathTransformer; } CSharpCompilation compilation; Layout layout; private bool init; /// /// Start initialization of the analyser. /// /// The arguments passed to Roslyn. /// A Boolean indicating whether to proceed with extraction. public bool BeginInitialize(string[] roslynArgs) { return init = LogRoslynArgs(roslynArgs, Extraction.Extractor.Version); } /// /// End initialization of the analyser. /// /// Arguments passed to csc. /// Extractor options. /// The Roslyn compilation. /// A Boolean indicating whether to proceed with extraction. public void EndInitialize( CSharpCommandLineArguments commandLineArguments, Options options, CSharpCompilation compilation) { if (!init) throw new InternalError("EndInitialize called without BeginInitialize returning true"); layout = new Layout(); this.options = options; this.compilation = compilation; extractor = new Extraction.Extractor(false, GetOutputName(compilation, commandLineArguments), Logger, PathTransformer); LogDiagnostics(); SetReferencePaths(); CompilationErrors += FilteredDiagnostics.Count(); } /// /// Constructs the map from assembly string to its filename. /// /// Roslyn doesn't record the relationship between a filename and its assembly /// information, so we need to retrieve this information manually. /// void SetReferencePaths() { foreach (var reference in compilation.References.OfType()) { try { var refPath = reference.FilePath; /* This method is significantly faster and more lightweight than using * System.Reflection.Assembly.ReflectionOnlyLoadFrom. It is also allows * loading the same assembly from different locations. */ using (var pereader = new System.Reflection.PortableExecutable.PEReader(new FileStream(refPath, FileMode.Open, FileAccess.Read, FileShare.Read))) { var metadata = pereader.GetMetadata(); string assemblyIdentity; unsafe { var reader = new System.Reflection.Metadata.MetadataReader(metadata.Pointer, metadata.Length); var def = reader.GetAssemblyDefinition(); assemblyIdentity = reader.GetString(def.Name) + " " + def.Version; } extractor.SetAssemblyFile(assemblyIdentity, refPath); } } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { extractor.Message(new Message("Exception reading reference file", reference.FilePath, null, ex.StackTrace)); } } } public void InitializeStandalone(CSharpCompilation compilationIn, CommonOptions options) { compilation = compilationIn; layout = new Layout(); extractor = new Extraction.Extractor(true, null, Logger, PathTransformer); this.options = options; LogExtractorInfo(Extraction.Extractor.Version); SetReferencePaths(); } readonly HashSet errorsToIgnore = new HashSet { "CS7027", // Code signing failure "CS1589", // XML referencing not supported "CS1569" // Error writing XML documentation }; IEnumerable FilteredDiagnostics { get { return extractor == null || extractor.Standalone || compilation == null ? Enumerable.Empty() : compilation. GetDiagnostics(). Where(e => e.Severity >= DiagnosticSeverity.Error && !errorsToIgnore.Contains(e.Id)); } } public IEnumerable MissingTypes => extractor.MissingTypes; public IEnumerable MissingNamespaces => extractor.MissingNamespaces; /// /// Determine the path of the output dll/exe. /// /// Information about the compilation. /// Cancellation token required. /// The filename. static string GetOutputName(CSharpCompilation compilation, CSharpCommandLineArguments commandLineArguments) { // There's no apparent way to access the output filename from the compilation, // so we need to re-parse the command line arguments. if (commandLineArguments.OutputFileName == null) { // No output specified: Use name based on first filename var entry = compilation.GetEntryPoint(System.Threading.CancellationToken.None); if (entry == null) { if (compilation.SyntaxTrees.Length == 0) throw new ArgumentNullException("No source files seen"); // Probably invalid, but have a go anyway. var entryPointFile = compilation.SyntaxTrees.First().FilePath; return Path.ChangeExtension(entryPointFile, ".exe"); } else { var entryPointFilename = entry.Locations.First().SourceTree.FilePath; return Path.ChangeExtension(entryPointFilename, ".exe"); } } else { return Path.Combine(commandLineArguments.OutputDirectory, commandLineArguments.OutputFileName); } } /// /// Perform an analysis on a source file/syntax tree. /// /// Syntax tree to analyse. public void AnalyseTree(SyntaxTree tree) { extractionTasks.Add(() => DoExtractTree(tree)); } /// /// Perform an analysis on an assembly. /// /// Assembly to analyse. void AnalyseAssembly(PortableExecutableReference assembly) { // CIL first - it takes longer. if (options.CIL) extractionTasks.Add(() => DoExtractCIL(assembly)); extractionTasks.Add(() => DoAnalyseAssembly(assembly)); } readonly object progressMutex = new object(); int taskCount = 0; CommonOptions options; static bool FileIsUpToDate(string src, string dest) { return File.Exists(dest) && File.GetLastWriteTime(dest) >= File.GetLastWriteTime(src); } /// /// Extracts compilation-wide entities, such as compilations and compiler diagnostics. /// public void AnalyseCompilation(string cwd, string[] args) { extractionTasks.Add(() => DoAnalyseCompilation(cwd, args)); } Entities.Compilation compilationEntity; IDisposable compilationTrapFile; void DoAnalyseCompilation(string cwd, string[] args) { try { var assemblyPath = extractor.OutputPath; var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); var assembly = compilation.Assembly; var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression); compilationTrapFile = trapWriter; // Dispose later var cx = extractor.CreateContext(compilation.Clone(), trapWriter, new AssemblyScope(assembly, assemblyPath, true), AddAssemblyTrapPrefix); compilationEntity = new Entities.Compilation(cx, cwd, args); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { Logger.Log(Severity.Error, " Unhandled exception analyzing {0}: {1}", "compilation", ex); } } public void LogPerformance(Entities.PerformanceMetrics p) => compilationEntity.PopulatePerformance(p); /// /// Extract an assembly to a new trap file. /// If the trap file exists, skip extraction to avoid duplicating /// extraction within the snapshot. /// /// The assembly to extract. void DoAnalyseAssembly(PortableExecutableReference r) { try { var stopwatch = new Stopwatch(); stopwatch.Start(); var assemblyPath = r.FilePath; var transformedAssemblyPath = PathTransformer.Transform(assemblyPath); var projectLayout = layout.LookupProjectOrDefault(transformedAssemblyPath); using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedAssemblyPath, true, options.TrapCompression)) { var skipExtraction = options.Cache && File.Exists(trapWriter.TrapFile); if (!skipExtraction) { /* Note on parallel builds: * * The trap writer and source archiver both perform atomic moves * of the file to the final destination. * * If the same source file or trap file are generated concurrently * (by different parallel invocations of the extractor), then * last one wins. * * Specifically, if two assemblies are analysed concurrently in a build, * then there is a small amount of duplicated work but the output should * still be correct. */ // compilation.Clone() reduces memory footprint by allowing the symbols // in c to be garbage collected. Compilation c = compilation.Clone(); var assembly = c.GetAssemblyOrModuleSymbol(r) as IAssemblySymbol; if (assembly != null) { var cx = extractor.CreateContext(c, trapWriter, new AssemblyScope(assembly, assemblyPath, false), AddAssemblyTrapPrefix); foreach (var module in assembly.Modules) { AnalyseNamespace(cx, module.GlobalNamespace); } cx.PopulateAll(); } } ReportProgress(assemblyPath, trapWriter.TrapFile, stopwatch.Elapsed, skipExtraction ? AnalysisAction.UpToDate : AnalysisAction.Extracted); } } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { Logger.Log(Severity.Error, " Unhandled exception analyzing {0}: {1}", r.FilePath, ex); } } void DoExtractCIL(PortableExecutableReference r) { var stopwatch = new Stopwatch(); stopwatch.Start(); string trapFile; bool extracted; CIL.Entities.Assembly.ExtractCIL(layout, r.FilePath, Logger, !options.Cache, options.PDB, options.TrapCompression, out trapFile, out extracted); stopwatch.Stop(); ReportProgress(r.FilePath, trapFile, stopwatch.Elapsed, extracted ? AnalysisAction.Extracted : AnalysisAction.UpToDate); } void AnalyseNamespace(Context cx, INamespaceSymbol ns) { foreach (var memberNamespace in ns.GetNamespaceMembers()) { AnalyseNamespace(cx, memberNamespace); } foreach (var memberType in ns.GetTypeMembers()) { Entities.Type.Create(cx, memberType).ExtractRecursive(); } } /// /// Enqueue all reference analysis tasks. /// public void AnalyseReferences() { foreach (var r in compilation.References.OfType()) { AnalyseAssembly(r); } } // The bulk of the extraction work, potentially executed in parallel. readonly List extractionTasks = new List(); void ReportProgress(string src, string output, TimeSpan time, AnalysisAction action) { lock (progressMutex) progressMonitor.Analysed(++taskCount, extractionTasks.Count, src, output, time, action); } void DoExtractTree(SyntaxTree tree) { try { var stopwatch = new Stopwatch(); stopwatch.Start(); var sourcePath = tree.FilePath; var transformedSourcePath = PathTransformer.Transform(sourcePath); var projectLayout = layout.LookupProjectOrNull(transformedSourcePath); bool excluded = projectLayout == null; string trapPath = excluded ? "" : projectLayout.GetTrapPath(Logger, transformedSourcePath, options.TrapCompression); bool upToDate = false; if (!excluded) { // compilation.Clone() is used to allow symbols to be garbage collected. using (var trapWriter = projectLayout.CreateTrapWriter(Logger, transformedSourcePath, false, options.TrapCompression)) { upToDate = options.Fast && FileIsUpToDate(sourcePath, trapWriter.TrapFile); if (!upToDate) { Context cx = extractor.CreateContext(compilation.Clone(), trapWriter, new SourceScope(tree), AddAssemblyTrapPrefix); Populators.CompilationUnit.Extract(cx, tree.GetRoot()); cx.PopulateAll(); cx.ExtractComments(cx.CommentGenerator); cx.PopulateAll(); } } } ReportProgress(sourcePath, trapPath, stopwatch.Elapsed, excluded ? AnalysisAction.Excluded : upToDate ? AnalysisAction.UpToDate : AnalysisAction.Extracted); } catch (Exception ex) // lgtm[cs/catch-of-all-exceptions] { extractor.Message(new Message("Unhandled exception processing syntax tree", tree.FilePath, null, ex.StackTrace)); } } /// /// Run all extraction tasks. /// /// The number of threads to use. public void PerformExtraction(int numberOfThreads) { Parallel.Invoke( new ParallelOptions { MaxDegreeOfParallelism = numberOfThreads }, extractionTasks.ToArray()); } public void Dispose() { compilationTrapFile?.Dispose(); stopWatch.Stop(); Logger.Log(Severity.Info, " Peak working set = {0} MB", Process.GetCurrentProcess().PeakWorkingSet64 / (1024 * 1024)); if (TotalErrors > 0) Logger.Log(Severity.Info, "EXTRACTION FAILED with {0} error{1} in {2}", TotalErrors, TotalErrors == 1 ? "" : "s", stopWatch.Elapsed); else Logger.Log(Severity.Info, "EXTRACTION SUCCEEDED in {0}", stopWatch.Elapsed); Logger.Dispose(); } /// /// Number of errors encountered during extraction. /// public int ExtractorErrors => extractor == null ? 0 : extractor.Errors; /// /// Number of errors encountered by the compiler. /// public int CompilationErrors { get; set; } /// /// Total number of errors reported. /// public int TotalErrors => CompilationErrors + ExtractorErrors; /// /// Logs information about the extractor. /// public void LogExtractorInfo(string extractorVersion) { Logger.Log(Severity.Info, " Extractor: {0}", Environment.GetCommandLineArgs().First()); Logger.Log(Severity.Info, " Extractor version: {0}", extractorVersion); Logger.Log(Severity.Info, " Current working directory: {0}", Directory.GetCurrentDirectory()); } /// /// Logs information about the extractor, as well as the arguments to Roslyn. /// /// The arguments passed to Roslyn. /// A Boolean indicating whether the same arguments have been logged previously. public bool LogRoslynArgs(string[] roslynArgs, string extractorVersion) { LogExtractorInfo(extractorVersion); Logger.Log(Severity.Info, $" Arguments to Roslyn: {string.Join(' ', roslynArgs)}"); var tempFile = Extractor.GetCSharpArgsLogPath(Path.GetRandomFileName()); bool argsWritten; using (var streamWriter = new StreamWriter(new FileStream(tempFile, FileMode.Append, FileAccess.Write))) { streamWriter.WriteLine($"# Arguments to Roslyn: {string.Join(' ', roslynArgs.Where(arg => !arg.StartsWith('@')))}"); argsWritten = roslynArgs.WriteCommandLine(streamWriter); } var hash = FileUtils.ComputeFileHash(tempFile); var argsFile = Extractor.GetCSharpArgsLogPath(hash); if (argsWritten) Logger.Log(Severity.Info, $" Arguments have been written to {argsFile}"); if (File.Exists(argsFile)) { try { File.Delete(tempFile); } catch (IOException e) { Logger.Log(Severity.Warning, $" Failed to remove {tempFile}: {e.Message}"); } return false; } try { File.Move(tempFile, argsFile); } catch (IOException e) { Logger.Log(Severity.Warning, $" Failed to move {tempFile} to {argsFile}: {e.Message}"); } return true; } /// /// Logs detailed information about this invocation, /// in the event that errors were detected. /// /// A Boolean indicating whether to proceed with extraction. public void LogDiagnostics() { foreach (var error in FilteredDiagnostics) { Logger.Log(Severity.Error, " Compilation error: {0}", error); } if (FilteredDiagnostics.Any()) { foreach (var reference in compilation.References) { Logger.Log(Severity.Info, " Resolved reference {0}", reference.Display); } } } } /// /// What action was performed when extracting a file. /// public enum AnalysisAction { Extracted, UpToDate, Excluded } /// /// Callback for various extraction events. /// (Used for display of progress). /// public interface IProgressMonitor { /// /// Callback that a particular item has been analysed. /// /// The item number being processed. /// The total number of items to process. /// The name of the item, e.g. a source file. /// The name of the item being output, e.g. a trap file. /// The time to extract the item. /// What action was taken for the file. void Analysed(int item, int total, string source, string output, TimeSpan time, AnalysisAction action); /// /// A "using namespace" directive was seen but the given /// namespace could not be found. /// Only called once for each @namespace. /// /// void MissingNamespace(string @namespace); /// /// An ErrorType was found. /// Called once for each type name. /// /// The full/partial name of the type. void MissingType(string type); /// /// Report a summary of missing entities. /// /// The number of missing types. /// The number of missing using namespace declarations. void MissingSummary(int types, int namespaces); } }