using System.Collections.Generic; using System.IO; using System.Linq; using Semmle.Util.Logging; namespace Semmle.Extraction.CSharp.DependencyFetching { /// /// Manages the set of assemblies. /// Searches for assembly DLLs, indexes them and provides /// a lookup facility from assembly ID to filename. /// internal class AssemblyCache { /// /// Locate all reference files and index them. /// /// /// Paths to search. Directories are searched recursively. Files are added directly to the /// assembly cache. /// /// Callback for progress. public AssemblyCache(IEnumerable paths, IEnumerable frameworkPaths, ILogger logger) { this.logger = logger; foreach (var path in paths) { if (File.Exists(path)) { dllsToIndex.Add(path); continue; } if (Directory.Exists(path)) { logger.LogInfo($"Finding reference DLLs in {path}..."); AddReferenceDirectory(path); } else { logger.LogInfo("AssemblyCache: Path not found: " + path); } } IndexReferences(frameworkPaths); } /// /// Finds all assemblies nested within a directory /// and adds them to its index. /// (Indexing is performed at a later stage by IndexReferences()). /// /// The directory to index. private void AddReferenceDirectory(string dir) { foreach (var dll in new DirectoryInfo(dir).EnumerateFiles("*.dll", SearchOption.AllDirectories)) { dllsToIndex.Add(dll.FullName); } } /// /// Indexes all DLLs we have located. /// Because this is a potentially time-consuming operation, it is put into a separate stage. /// private void IndexReferences(IEnumerable frameworkPaths) { logger.LogInfo($"Indexing {dllsToIndex.Count} assemblies..."); // Read all of the files foreach (var filename in dllsToIndex) { IndexReference(filename); } logger.LogInfo($"Read {assemblyInfoByFileName.Count} assembly infos"); foreach (var info in assemblyInfoByFileName.Values .OrderBy(info => info.Name) .OrderAssemblyInfosByPreference(frameworkPaths)) { foreach (var index in info.IndexStrings) { assemblyInfoById[index] = info; } } } private void IndexReference(string filename) { try { logger.LogDebug($"Reading assembly info from {filename}"); var info = AssemblyInfo.ReadFromFile(filename); assemblyInfoByFileName[filename] = info; } catch (AssemblyLoadException) { logger.LogInfo($"Couldn't read assembly info from {filename}"); } } /// /// Given an assembly id, determine its full info. /// /// The given assembly id. /// The information about the assembly. public AssemblyInfo ResolveReference(string id) { // Fast path if we've already seen this before. if (failedAssemblyInfoIds.Contains(id)) throw new AssemblyLoadException(); (id, var assemblyName) = AssemblyInfo.ComputeSanitizedAssemblyInfo(id); // Look up the id in our references map. if (assemblyInfoById.TryGetValue(id, out var result)) { // The string is in the references map. return result; } // Fallback position - locate the assembly by its lower-case name only. var asmName = assemblyName.ToLowerInvariant(); if (assemblyInfoById.TryGetValue(asmName, out result)) { assemblyInfoById[asmName] = result; // Speed up the next time the same string is resolved return result; } failedAssemblyInfoIds.Add(id); // Fail early next time throw new AssemblyLoadException(); } /// /// All the assemblies we have indexed. /// public IEnumerable AllAssemblies => assemblyInfoByFileName.Select(a => a.Value); /// /// Retrieve the assembly info of a pre-cached assembly. /// /// The filename to query. /// The assembly info. public AssemblyInfo GetAssemblyInfo(string filepath) { if (assemblyInfoByFileName.TryGetValue(filepath, out var info)) { return info; } IndexReference(filepath); if (assemblyInfoByFileName.TryGetValue(filepath, out info)) { return info; } throw new AssemblyLoadException(); } private readonly List dllsToIndex = new List(); private readonly Dictionary assemblyInfoByFileName = new Dictionary(); // Map from assembly id (in various formats) to the full info. private readonly Dictionary assemblyInfoById = new Dictionary(); private readonly HashSet failedAssemblyInfoIds = new HashSet(); private readonly ILogger logger; } }