C#: Move all file lookup to separate class

This commit is contained in:
Tamas Vajk
2024-04-12 13:58:37 +02:00
parent 91f2ea572c
commit 5406fac834
4 changed files with 159 additions and 99 deletions

View File

@@ -13,15 +13,15 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
{
public sealed partial class DependencyManager
{
private void RestoreNugetPackages(List<FileInfo> allNonBinaryFiles, IEnumerable<string> allProjects, IEnumerable<string> allSolutions, HashSet<AssemblyLookupLocation> dllLocations)
private void RestoreNugetPackages(HashSet<AssemblyLookupLocation> dllLocations)
{
var checkNugetFeedResponsiveness = EnvironmentVariables.GetBoolean(EnvironmentVariableNames.CheckNugetFeedResponsiveness);
try
{
if (checkNugetFeedResponsiveness && !CheckFeeds(allNonBinaryFiles))
if (checkNugetFeedResponsiveness && !CheckFeeds())
{
// todo: we could also check the reachability of the inherited nuget feeds, but to use those in the fallback we would need to handle authentication too.
DownloadMissingPackagesFromSpecificFeeds(allNonBinaryFiles, dllLocations);
DownloadMissingPackagesFromSpecificFeeds(dllLocations);
return;
}
@@ -64,8 +64,8 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
logger.LogError($"Failed to restore Nuget packages with nuget.exe: {exc.Message}");
}
var restoredProjects = RestoreSolutions(allSolutions, out var assets1);
var projects = allProjects.Except(restoredProjects);
var restoredProjects = RestoreSolutions(out var assets1);
var projects = fileProvider.Projects.Except(restoredProjects);
RestoreProjects(projects, out var assets2);
var dependencies = Assets.GetCompilationDependencies(logger, assets1.Union(assets2));
@@ -80,11 +80,11 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
if (checkNugetFeedResponsiveness)
{
DownloadMissingPackagesFromSpecificFeeds(allNonBinaryFiles, dllLocations);
DownloadMissingPackagesFromSpecificFeeds(dllLocations);
}
else
{
DownloadMissingPackages(allNonBinaryFiles, dllLocations);
DownloadMissingPackages(dllLocations);
}
}
@@ -122,13 +122,12 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
/// Populates assets with the relative paths to the assets files generated by the restore.
/// Returns a list of projects that are up to date with respect to restore.
/// </summary>
/// <param name="solutions">A list of paths to solution files.</param>
private IEnumerable<string> RestoreSolutions(IEnumerable<string> solutions, out IEnumerable<string> assets)
private IEnumerable<string> RestoreSolutions(out IEnumerable<string> assets)
{
var successCount = 0;
var nugetSourceFailures = 0;
var assetFiles = new List<string>();
var projects = solutions.SelectMany(solution =>
var projects = fileProvider.Solutions.SelectMany(solution =>
{
logger.LogInfo($"Restoring solution {solution}...");
var res = dotnet.Restore(new(solution, packageDirectory.DirInfo.FullName, ForceDotnetRefAssemblyFetching: true));
@@ -184,12 +183,12 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
CompilationInfos.Add(("Failed project restore with package source error", nugetSourceFailures.ToString()));
}
private void DownloadMissingPackagesFromSpecificFeeds(List<FileInfo> allNonBinaryFiles, HashSet<AssemblyLookupLocation> dllLocations)
private void DownloadMissingPackagesFromSpecificFeeds(HashSet<AssemblyLookupLocation> dllLocations)
{
var reachableFallbackFeeds = GetReachableFallbackNugetFeeds();
if (reachableFallbackFeeds.Count > 0)
{
DownloadMissingPackages(allNonBinaryFiles, dllLocations, fallbackNugetFeeds: reachableFallbackFeeds);
DownloadMissingPackages(dllLocations, fallbackNugetFeeds: reachableFallbackFeeds);
}
else
{
@@ -197,7 +196,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
}
}
private void DownloadMissingPackages(List<FileInfo> allFiles, HashSet<AssemblyLookupLocation> dllLocations, IEnumerable<string>? fallbackNugetFeeds = null)
private void DownloadMissingPackages(HashSet<AssemblyLookupLocation> dllLocations, IEnumerable<string>? fallbackNugetFeeds = null)
{
var alreadyDownloadedPackages = GetRestoredPackageDirectoryNames(packageDirectory.DirInfo);
var alreadyDownloadedLegacyPackages = GetRestoredLegacyPackageNames();
@@ -232,7 +231,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
logger.LogInfo($"Found {notYetDownloadedPackages.Count} packages that are not yet restored");
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName, "nugetconfig"));
var nugetConfig = fallbackNugetFeeds is null
? GetNugetConfig(allFiles)
? GetNugetConfig()
: CreateFallbackNugetConfig(fallbackNugetFeeds, tempDir.DirInfo.FullName);
CompilationInfos.Add(("Fallback nuget restore", notYetDownloadedPackages.Count.ToString()));
@@ -280,19 +279,14 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
return nugetConfigPath;
}
private string[] GetAllNugetConfigs(List<FileInfo> allFiles) => allFiles.SelectFileNamesByName("nuget.config").ToArray();
private string? GetNugetConfig(List<FileInfo> allFiles)
private string? GetNugetConfig()
{
var nugetConfigs = GetAllNugetConfigs(allFiles);
var nugetConfigs = fileProvider.NugetConfigs;
string? nugetConfig;
if (nugetConfigs.Length > 1)
if (nugetConfigs.Count > 1)
{
logger.LogInfo($"Found multiple nuget.config files: {string.Join(", ", nugetConfigs)}.");
nugetConfig = allFiles
.SelectRootFiles(sourceDir)
.SelectFileNamesByName("nuget.config")
.FirstOrDefault();
nugetConfig = fileProvider.RootNugetConfig;
if (nugetConfig == null)
{
logger.LogInfo("Could not find a top-level nuget.config file.");
@@ -512,10 +506,10 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
return (timeoutMilliSeconds, tryCount);
}
private bool CheckFeeds(List<FileInfo> allFiles)
private bool CheckFeeds()
{
logger.LogInfo("Checking Nuget feeds...");
var (explicitFeeds, allFeeds) = GetAllFeeds(allFiles);
var (explicitFeeds, allFeeds) = GetAllFeeds();
var excludedFeeds = EnvironmentVariables.GetURLs(EnvironmentVariableNames.ExcludedNugetFeedsFromResponsivenessCheck)
.ToHashSet() ?? [];
@@ -581,13 +575,13 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
}
}
private (HashSet<string> explicitFeeds, HashSet<string> allFeeds) GetAllFeeds(List<FileInfo> allFiles)
private (HashSet<string> explicitFeeds, HashSet<string> allFeeds) GetAllFeeds()
{
IList<string> GetNugetFeeds(string nugetConfig) => dotnet.GetNugetFeeds(nugetConfig);
IList<string> GetNugetFeedsFromFolder(string folderPath) => dotnet.GetNugetFeedsFromFolder(folderPath);
var nugetConfigs = GetAllNugetConfigs(allFiles);
var nugetConfigs = fileProvider.NugetConfigs;
var explicitFeeds = nugetConfigs
.SelectMany(config => GetFeeds(() => GetNugetFeeds(config)))
.ToHashSet();

View File

@@ -36,6 +36,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
private readonly TemporaryDirectory legacyPackageDirectory;
private readonly TemporaryDirectory missingPackageDirectory;
private readonly TemporaryDirectory tempWorkingDirectory;
private readonly FileProvider fileProvider;
private readonly bool cleanupTempWorkingDirectory;
private readonly Lazy<Runtime> runtimeLazy;
@@ -79,20 +80,10 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
tempWorkingDirectory = new TemporaryDirectory(FileUtils.GetTemporaryWorkingDirectory(out cleanupTempWorkingDirectory));
logger.LogInfo($"Finding files in {srcDir}...");
var allFiles = GetAllFiles().ToList();
var binaryFileExtensions = new HashSet<string>(new[] { ".dll", ".exe" }); // TODO: add more binary file extensions.
var allNonBinaryFiles = allFiles.Where(f => !binaryFileExtensions.Contains(f.Extension.ToLowerInvariant())).ToList();
var smallNonBinaryFiles = allNonBinaryFiles.SelectSmallFiles(logger).SelectFileNames().ToList();
this.fileContent = new FileContent(logger, smallNonBinaryFiles);
this.nonGeneratedSources = allNonBinaryFiles.SelectFileNamesByExtension(".cs").ToList();
this.generatedSources = new();
var allProjects = allNonBinaryFiles.SelectFileNamesByExtension(".csproj").ToList();
var allSolutions = allNonBinaryFiles.SelectFileNamesByExtension(".sln").ToList();
var dllLocations = allFiles.SelectFileNamesByExtension(".dll").Select(x => new AssemblyLookupLocation(x)).ToHashSet();
logger.LogInfo($"Found {allFiles.Count} files, {nonGeneratedSources.Count} source files, {allProjects.Count} project files, {allSolutions.Count} solution files, {dllLocations.Count} DLLs.");
this.fileProvider = new FileProvider(sourceDir, logger);
this.fileContent = new FileContent(logger, this.fileProvider.SmallNonBinary);
this.nonGeneratedSources = fileProvider.Sources.ToList();
this.generatedSources = [];
void startCallback(string s, bool silent)
{
@@ -104,7 +95,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
logger.Log(silent ? Severity.Debug : Severity.Info, $"Exit code {ret}{(string.IsNullOrEmpty(msg) ? "" : $": {msg}")}");
}
DotNet.WithDotNet(SystemBuildActions.Instance, logger, smallNonBinaryFiles, tempWorkingDirectory.ToString(), shouldCleanUp: false, ensureDotNetAvailable: true, version: null, installDir =>
DotNet.WithDotNet(SystemBuildActions.Instance, logger, fileProvider.GlobalJsons, tempWorkingDirectory.ToString(), shouldCleanUp: false, ensureDotNetAvailable: true, version: null, installDir =>
{
this.dotnetPath = installDir;
return BuildScript.Success;
@@ -121,13 +112,14 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
throw;
}
RestoreNugetPackages(allNonBinaryFiles, allProjects, allSolutions, dllLocations);
var dllLocations = fileProvider.Dlls.Select(x => new AssemblyLookupLocation(x)).ToHashSet();
RestoreNugetPackages(dllLocations);
// Find DLLs in the .Net / Asp.Net Framework
// This needs to come after the nuget restore, because the nuget restore might fetch the .NET Core/Framework reference assemblies.
var frameworkLocations = AddFrameworkDlls(dllLocations);
assemblyCache = new AssemblyCache(dllLocations, frameworkLocations, logger);
AnalyseSolutions(allSolutions);
AnalyseSolutions(fileProvider.Solutions);
foreach (var filename in assemblyCache.AllAssemblies.Select(a => a.Filename))
{
@@ -154,7 +146,7 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
shouldExtractWebViews)
{
CompilationInfos.Add(("WebView extraction enabled", "1"));
GenerateSourceFilesFromWebViews(allNonBinaryFiles);
GenerateSourceFilesFromWebViews();
}
else
{
@@ -171,8 +163,8 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
logger.LogInfo("Build analysis summary:");
logger.LogInfo($"{nonGeneratedSources.Count,align} source files found on the filesystem");
logger.LogInfo($"{generatedSources.Count,align} source files have been generated");
logger.LogInfo($"{allSolutions.Count,align} solution files found on the filesystem");
logger.LogInfo($"{allProjects.Count,align} project files found on the filesystem");
logger.LogInfo($"{fileProvider.Solutions.Count,align} solution files found on the filesystem");
logger.LogInfo($"{fileProvider.Projects.Count,align} project files found on the filesystem");
logger.LogInfo($"{usedReferences.Keys.Count,align} resolved references");
logger.LogInfo($"{unresolvedReferences.Count,align} unresolved references");
logger.LogInfo($"{conflictedReferences,align} resolved assembly conflicts");
@@ -182,8 +174,8 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
CompilationInfos.AddRange([
("Source files on filesystem", nonGeneratedSources.Count.ToString()),
("Source files generated", generatedSources.Count.ToString()),
("Solution files on filesystem", allSolutions.Count.ToString()),
("Project files on filesystem", allProjects.Count.ToString()),
("Solution files on filesystem", fileProvider.Solutions.Count.ToString()),
("Project files on filesystem", fileProvider.Projects.Count.ToString()),
("Resolved references", usedReferences.Keys.Count.ToString()),
("Unresolved references", unresolvedReferences.Count.ToString()),
("Resolved assembly conflicts", conflictedReferences.ToString()),
@@ -467,15 +459,15 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
}
}
private void GenerateSourceFilesFromWebViews(List<FileInfo> allFiles)
private void GenerateSourceFilesFromWebViews()
{
var views = allFiles.SelectFileNamesByExtension(".cshtml", ".razor").ToArray();
if (views.Length == 0)
var views = fileProvider.RazorViews;
if (views.Count == 0)
{
return;
}
logger.LogInfo($"Found {views.Length} cshtml and razor files.");
logger.LogInfo($"Found {views.Count} cshtml and razor files.");
if (!IsAspNetCoreDetected())
{
@@ -503,38 +495,6 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
}
}
private IEnumerable<FileInfo> GetAllFiles()
{
IEnumerable<FileInfo> files = sourceDir.GetFiles("*.*", new EnumerationOptions { RecurseSubdirectories = true });
if (dotnetPath != null)
{
files = files.Where(f => !f.FullName.StartsWith(dotnetPath, StringComparison.OrdinalIgnoreCase));
}
files = files.Where(f =>
{
try
{
if (f.Exists)
{
return true;
}
logger.LogWarning($"File {f.FullName} could not be processed.");
return false;
}
catch (Exception ex)
{
logger.LogWarning($"File {f.FullName} could not be processed: {ex.Message}");
return false;
}
});
files = new FilePathFilter(sourceDir, logger).Filter(files);
return files;
}
/// <summary>
/// Computes a unique temp directory for the packages associated
/// with this source tree. Use a SHA1 of the directory name.

View File

@@ -14,20 +14,6 @@ namespace Semmle.Extraction.CSharp.DependencyFetching
public static IEnumerable<FileInfo> SelectRootFiles(this IEnumerable<FileInfo> files, DirectoryInfo dir) =>
files.Where(file => file.DirectoryName == dir.FullName);
internal static IEnumerable<FileInfo> SelectSmallFiles(this IEnumerable<FileInfo> files, ILogger logger)
{
const int oneMb = 1_048_576;
return files.Where(file =>
{
if (file.Length > oneMb)
{
logger.LogDebug($"Skipping {file.FullName} because it is bigger than 1MB.");
return false;
}
return true;
});
}
public static IEnumerable<string> SelectFileNamesByExtension(this IEnumerable<FileInfo> files, params string[] extensions) =>
files.SelectFilesAux(fi => extensions.Contains(fi.Extension));

View File

@@ -0,0 +1,120 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Policy;
using Semmle.Util.Logging;
namespace Semmle.Extraction.CSharp.DependencyFetching
{
public class FileProvider
{
private static readonly HashSet<string> binaryFileExtensions = [".dll", ".exe"]; // TODO: add more binary file extensions.
private readonly ILogger logger;
private readonly Lazy<FileInfo[]> all;
private readonly Lazy<FileInfo[]> allNonBinary;
private readonly Lazy<string[]> smallNonBinary;
private readonly Lazy<string[]> sources;
private readonly Lazy<string[]> projects;
private readonly Lazy<string[]> solutions;
private readonly Lazy<string[]> dlls;
private readonly Lazy<string[]> nugetConfigs;
private readonly Lazy<string[]> globalJsons;
private readonly Lazy<string[]> razorViews;
private readonly Lazy<string?> rootNugetConfig;
public FileProvider(DirectoryInfo sourceDir, ILogger logger)
{
SourceDir = sourceDir;
this.logger = logger;
all = new Lazy<FileInfo[]>(GetAllFiles);
allNonBinary = new Lazy<FileInfo[]>(() => all.Value.Where(f => !binaryFileExtensions.Contains(f.Extension.ToLowerInvariant())).ToArray());
smallNonBinary = new Lazy<string[]>(() =>
{
var ret = SelectSmallFiles(allNonBinary.Value).SelectFileNames().ToArray();
logger.LogInfo($"Found {ret.Length} small non-binary files in {SourceDir}.");
return ret;
});
sources = new Lazy<string[]>(() => SelectTextFileNamesByExtension("source", ".cs"));
projects = new Lazy<string[]>(() => SelectTextFileNamesByExtension("project", ".csproj"));
solutions = new Lazy<string[]>(() => SelectTextFileNamesByExtension("solution", ".sln"));
dlls = new Lazy<string[]>(() => SelectBinaryFileNamesByExtension("DLL", ".dll"));
nugetConfigs = new Lazy<string[]>(() => allNonBinary.Value.SelectFileNamesByName("nuget.config").ToArray());
globalJsons = new Lazy<string[]>(() => allNonBinary.Value.SelectFileNamesByName("global.json").ToArray());
razorViews = new Lazy<string[]>(() => SelectTextFileNamesByExtension("razor view", ".cshtml", ".razor"));
rootNugetConfig = new Lazy<string?>(() => all.Value.SelectRootFiles(SourceDir).SelectFileNamesByName("nuget.config").FirstOrDefault());
}
private string[] SelectTextFileNamesByExtension(string filetype, params string[] extensions)
{
var ret = allNonBinary.Value.SelectFileNamesByExtension(extensions).ToArray();
logger.LogInfo($"Found {ret.Length} {filetype} files in {SourceDir}.");
return ret;
}
private string[] SelectBinaryFileNamesByExtension(string filetype, params string[] extensions)
{
var ret = all.Value.SelectFileNamesByExtension(extensions).ToArray();
logger.LogInfo($"Found {ret.Length} {filetype} files in {SourceDir}.");
return ret;
}
private IEnumerable<FileInfo> SelectSmallFiles(IEnumerable<FileInfo> files)
{
const int oneMb = 1_048_576;
return files.Where(file =>
{
if (file.Length > oneMb)
{
logger.LogDebug($"Skipping {file.FullName} because it is bigger than 1MB.");
return false;
}
return true;
});
}
private FileInfo[] GetAllFiles()
{
logger.LogInfo($"Finding files in {SourceDir}...");
var files = SourceDir.GetFiles("*.*", new EnumerationOptions { RecurseSubdirectories = true });
var filteredFiles = files.Where(f =>
{
try
{
if (f.Exists)
{
return true;
}
logger.LogWarning($"File {f.FullName} could not be processed.");
return false;
}
catch (Exception ex)
{
logger.LogWarning($"File {f.FullName} could not be processed: {ex.Message}");
return false;
}
});
var allFiles = new FilePathFilter(SourceDir, logger).Filter(filteredFiles).ToArray();
logger.LogInfo($"Found {allFiles.Length} files in {SourceDir}.");
return allFiles;
}
public DirectoryInfo SourceDir { get; }
public IEnumerable<string> SmallNonBinary => smallNonBinary.Value;
public IEnumerable<string> Sources => sources.Value;
public ICollection<string> Projects => projects.Value;
public ICollection<string> Solutions => solutions.Value;
public IEnumerable<string> Dlls => dlls.Value;
public ICollection<string> NugetConfigs => nugetConfigs.Value;
public string? RootNugetConfig => rootNugetConfig.Value;
public IEnumerable<string> GlobalJsons => globalJsons.Value;
public ICollection<string> RazorViews => razorViews.Value;
}
}