C#: Re-factor the processing of all files into a helper class and make sure to only do one pass of the files.

This commit is contained in:
Michael Nebel
2023-08-10 10:47:52 +02:00
parent db685e5fc4
commit c95d219c84
2 changed files with 153 additions and 112 deletions

View File

@@ -8,14 +8,13 @@ using System.Threading.Tasks;
using System.Collections.Concurrent;
using System.Text;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
namespace Semmle.BuildAnalyser
{
/// <summary>
/// Main implementation of the build analysis.
/// </summary>
internal sealed partial class BuildAnalysis : IDisposable
internal sealed class BuildAnalysis : IDisposable
{
private readonly AssemblyCache assemblyCache;
private readonly ProgressMonitor progressMonitor;
@@ -29,7 +28,7 @@ namespace Semmle.BuildAnalyser
private readonly Options options;
private readonly DirectoryInfo sourceDir;
private readonly DotNet dotnet;
private readonly Lazy<IEnumerable<string>> allFiles;
private readonly FileContent fileContent;
private readonly TemporaryDirectory packageDirectory;
@@ -58,7 +57,9 @@ namespace Semmle.BuildAnalyser
this.progressMonitor.FindingFiles(options.SrcDir);
this.allFiles = new(() => GetFiles("*.*"));
packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));
this.fileContent = new FileContent(packageDirectory, progressMonitor, () => GetFiles("*.*"));
this.allSources = GetFiles("*.cs").ToArray();
var allProjects = GetFiles("*.csproj");
var solutions = options.SolutionFile is not null
@@ -75,7 +76,7 @@ namespace Semmle.BuildAnalyser
progressMonitor.LogInfo($".NET runtime location selected: {runtimeLocation}");
dllDirNames.Add(runtimeLocation);
if (UseAspNetDlls() && runtime.GetAspRuntime() is string aspRuntime)
if (fileContent.UseAspNetDlls && runtime.GetAspRuntime() is string aspRuntime)
{
progressMonitor.LogInfo($"ASP.NET runtime location selected: {aspRuntime}");
dllDirNames.Add(aspRuntime);
@@ -87,8 +88,6 @@ namespace Semmle.BuildAnalyser
UseReference(typeof(object).Assembly.Location);
}
packageDirectory = new TemporaryDirectory(ComputeTempDirectory(sourceDir.FullName));
if (options.UseNuGet)
{
dllDirNames.Add(packageDirectory.DirInfo.FullName);
@@ -230,11 +229,6 @@ namespace Semmle.BuildAnalyser
/// <param name="sourceFile">The source file.</param>
private void UseSource(FileInfo sourceFile) => sources[sourceFile.FullName] = sourceFile.Exists;
/// <summary>
/// All files in the source directory.
/// </summary>
private IEnumerable<string> AllFiles => allFiles.Value;
/// <summary>
/// The list of resolved reference files.
/// </summary>
@@ -335,73 +329,9 @@ namespace Semmle.BuildAnalyser
}
}
private static string GetGroup(ReadOnlySpan<char> input, ValueMatch valueMatch, string groupPrefix)
{
var match = input.Slice(valueMatch.Index, valueMatch.Length);
var includeIndex = match.IndexOf(groupPrefix, StringComparison.InvariantCultureIgnoreCase);
if (includeIndex == -1)
{
return string.Empty;
}
match = match.Slice(includeIndex + groupPrefix.Length + 1);
var quoteIndex1 = match.IndexOf("\"");
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
return match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
}
private static bool IsGroupMatch(ReadOnlySpan<char> line, Regex regex, string groupPrefix, string value)
{
foreach (var valueMatch in regex.EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
if (GetGroup(line, valueMatch, groupPrefix) == value.ToLowerInvariant())
{
return true;
}
}
return false;
}
/// <summary>
/// Returns true if any file in the source directory indicates that ASP.NET is used.
/// The following heuristic is used to decide, if ASP.NET is used:
/// If any file in the source directory contains something like (this will most like be a .csproj file)
/// <Project Sdk="Microsoft.NET.Sdk.Web">
/// <FrameworkReference Include="Microsoft.AspNetCore.App"/>
/// </summary>
private bool UseAspNetDlls()
{
foreach (var file in AllFiles)
{
try
{
using var sr = new StreamReader(file);
ReadOnlySpan<char> line;
while ((line = sr.ReadLine()) != null)
{
if (IsGroupMatch(line, ProjectSdk(), "Sdk", "Microsoft.NET.Sdk.Web") ||
IsGroupMatch(line, FrameworkReference(), "Include", "Microsoft.AspNetCore.App"))
{
return true;
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
}
}
return false;
}
private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
{
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
var notYetDownloadedPackages = new HashSet<string>();
var nugetConfigs = GetFiles("nuget.config", recurseSubdirectories: true).ToArray();
string? nugetConfig = null;
if (nugetConfigs.Length > 1)
@@ -418,32 +348,7 @@ namespace Semmle.BuildAnalyser
nugetConfig = nugetConfigs.FirstOrDefault();
}
foreach (var file in AllFiles)
{
try
{
using var sr = new StreamReader(file);
ReadOnlySpan<char> line;
while ((line = sr.ReadLine()) != null)
{
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
var packageName = GetGroup(line, valueMatch, "Include");
if (!string.IsNullOrEmpty(packageName) && !alreadyDownloadedPackages.Contains(packageName))
{
notYetDownloadedPackages.Add(packageName);
}
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
}
}
foreach (var package in notYetDownloadedPackages)
foreach (var package in fileContent.NotYetDownloadedPackages)
{
progressMonitor.NugetInstall(package);
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(package));
@@ -487,15 +392,5 @@ namespace Semmle.BuildAnalyser
}
public void Dispose() => packageDirectory?.Dispose();
[GeneratedRegex("<PackageReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex PackageReference();
[GeneratedRegex("<FrameworkReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex FrameworkReference();
[GeneratedRegex("<Project\\s+Sdk=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex ProjectSdk();
}
}

View File

@@ -0,0 +1,146 @@
using Semmle.Util;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace Semmle.BuildAnalyser
{
// <summary>
// This class is used to read a set of files and decide different properties about the
// content (by reading the content of the files only once).
// The implementation is lazy, so the properties are only calculated when
// the first property is accessed.
// </summary>
internal partial class FileContent
{
private readonly ProgressMonitor progressMonitor;
private readonly TemporaryDirectory packageDirectory;
private readonly Func<IEnumerable<string>> getFiles;
private readonly HashSet<string> notYetDownloadedPackages = new HashSet<string>();
private bool IsInitialized { get; set; } = false;
public HashSet<string> NotYetDownloadedPackages
{
get
{
Initialize();
return notYetDownloadedPackages;
}
}
private bool useAspNetDlls = false;
/// <summary>
/// True if any file in the source directory indicates that ASP.NET is used.
/// The following heuristic is used to decide, if ASP.NET is used:
/// If any file in the source directory contains something like (this will most like be a .csproj file)
/// <Project Sdk="Microsoft.NET.Sdk.Web">
/// <FrameworkReference Include="Microsoft.AspNetCore.App"/>
/// </summary>
public bool UseAspNetDlls
{
get
{
Initialize();
return useAspNetDlls;
}
}
public FileContent(TemporaryDirectory packageDirectory, ProgressMonitor progressMonitor, Func<IEnumerable<string>> getFiles)
{
this.progressMonitor = progressMonitor;
this.packageDirectory = packageDirectory;
this.getFiles = getFiles;
}
private static string GetGroup(ReadOnlySpan<char> input, ValueMatch valueMatch, string groupPrefix)
{
var match = input.Slice(valueMatch.Index, valueMatch.Length);
var includeIndex = match.IndexOf(groupPrefix, StringComparison.InvariantCultureIgnoreCase);
if (includeIndex == -1)
{
return string.Empty;
}
match = match.Slice(includeIndex + groupPrefix.Length + 1);
var quoteIndex1 = match.IndexOf("\"");
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
return match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
}
private static bool IsGroupMatch(ReadOnlySpan<char> line, Regex regex, string groupPrefix, string value)
{
foreach (var valueMatch in regex.EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
if (GetGroup(line, valueMatch, groupPrefix) == value.ToLowerInvariant())
{
return true;
}
}
return false;
}
private void Initialize()
{
if (IsInitialized)
{
return;
}
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
foreach (var file in getFiles())
{
try
{
using var sr = new StreamReader(file);
ReadOnlySpan<char> line;
while ((line = sr.ReadLine()) != null)
{
// Find the not yet downloaded packages.
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
var packageName = GetGroup(line, valueMatch, "Include");
if (!string.IsNullOrEmpty(packageName) && !alreadyDownloadedPackages.Contains(packageName))
{
notYetDownloadedPackages.Add(packageName);
}
}
// Determine if ASP.NET is used.
if (!useAspNetDlls)
{
useAspNetDlls =
IsGroupMatch(line, ProjectSdk(), "Sdk", "Microsoft.NET.Sdk.Web") ||
IsGroupMatch(line, FrameworkReference(), "Include", "Microsoft.AspNetCore.App");
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
}
}
IsInitialized = true;
}
[GeneratedRegex("<PackageReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex PackageReference();
[GeneratedRegex("<FrameworkReference\\s+Include=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex FrameworkReference();
[GeneratedRegex("<Project\\s+Sdk=\"(.*?)\".*/?>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex ProjectSdk();
}
}