Retrieve package IDs from files and restore the not yet restored ones

Read all files in the repo and look for `PackageReference` XML elements
to extract the package IDs, then restore the packages that are not yet
restored. This change improves the percentage of found assemblies on the
Powershell repo from 95% to 97% compared to a traced extraction. Also,
it increases the number of assemblied only referenced in the standalone
extraction from 79 to 134.
This commit is contained in:
Tamas Vajk
2023-07-04 13:11:21 +02:00
parent cd6419503f
commit ef0e102cd7
3 changed files with 111 additions and 5 deletions

View File

@@ -8,13 +8,14 @@ using System.Threading.Tasks;
using System.Collections.Concurrent;
using System.Text;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
namespace Semmle.BuildAnalyser
{
/// <summary>
/// Main implementation of the build analysis.
/// </summary>
internal sealed class BuildAnalysis : IDisposable
internal sealed partial class BuildAnalysis : IDisposable
{
private readonly AssemblyCache assemblyCache;
private readonly ProgressMonitor progressMonitor;
@@ -95,6 +96,7 @@ namespace Semmle.BuildAnalyser
{
Restore(solutions);
Restore(allProjects);
DownloadMissingPackages(allProjects);
}
}
@@ -316,9 +318,9 @@ namespace Semmle.BuildAnalyser
}
private void Restore(string target)
private bool Restore(string target)
{
dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName);
return dotnet.RestoreToDirectory(target, packageDirectory.DirInfo.FullName);
}
private void Restore(IEnumerable<string> targets)
@@ -329,6 +331,76 @@ namespace Semmle.BuildAnalyser
}
}
private void DownloadMissingPackages(IEnumerable<string> restoreTargets)
{
var alreadyDownloadedPackages = Directory.GetDirectories(packageDirectory.DirInfo.FullName).Select(d => Path.GetFileName(d).ToLowerInvariant()).ToHashSet();
var notYetDownloadedPackages = new HashSet<string>();
var allFiles = GetFiles("*.*").ToArray();
foreach (var file in allFiles)
{
try
{
using var sr = new StreamReader(file);
ReadOnlySpan<char> line;
while ((line = sr.ReadLine()) != null)
{
foreach (var valueMatch in PackageReference().EnumerateMatches(line))
{
// We can't get the group from the ValueMatch, so doing it manually:
var match = line.Slice(valueMatch.Index, valueMatch.Length);
var includeIndex = match.IndexOf("Include", StringComparison.InvariantCultureIgnoreCase);
if (includeIndex == -1)
{
continue;
}
match = match.Slice(includeIndex + "Include".Length + 1);
var quoteIndex1 = match.IndexOf("\"");
var quoteIndex2 = match.Slice(quoteIndex1 + 1).IndexOf("\"");
var packageName = match.Slice(quoteIndex1 + 1, quoteIndex2).ToString().ToLowerInvariant();
if (!alreadyDownloadedPackages.Contains(packageName))
{
notYetDownloadedPackages.Add(packageName);
}
}
}
}
catch (Exception ex)
{
progressMonitor.FailedToReadFile(file, ex);
continue;
}
}
foreach (var package in notYetDownloadedPackages)
{
progressMonitor.NugetInstall(package);
using var tempDir = new TemporaryDirectory(ComputeTempDirectory(package));
var success = dotnet.New(tempDir.DirInfo.FullName);
if (!success)
{
continue;
}
success = dotnet.AddPackage(tempDir.DirInfo.FullName, package);
if (!success)
{
continue;
}
success = Restore(tempDir.DirInfo.FullName);
// TODO: the restore might fail, we could retry with a prerelease (*-* instead of *) version of the package.
if (!success)
{
progressMonitor.FailedToRestoreNugetPackage(package);
}
}
}
private void AnalyseSolutions(IEnumerable<string> solutions)
{
Parallel.ForEach(solutions, new ParallelOptions { MaxDegreeOfParallelism = 4 }, solutionFile =>
@@ -350,5 +422,8 @@ namespace Semmle.BuildAnalyser
{
packageDirectory?.Dispose();
}
[GeneratedRegex("<PackageReference .*Include=\"(.*?)\".*/>", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex PackageReference();
}
}

View File

@@ -36,16 +36,36 @@ namespace Semmle.BuildAnalyser
}
}
public void RestoreToDirectory(string projectOrSolutionFile, string packageDirectory)
private bool RunCommand(string args)
{
var args = $"restore --no-dependencies \"{projectOrSolutionFile}\" --packages \"{packageDirectory}\" /p:DisableImplicitNuGetFallbackFolder=true";
progressMonitor.RunningProcess($"dotnet {args}");
using var proc = Process.Start("dotnet", args);
proc.WaitForExit();
if (proc.ExitCode != 0)
{
progressMonitor.CommandFailed("dotnet", args, proc.ExitCode);
return false;
}
return true;
}
public bool RestoreToDirectory(string projectOrSolutionFile, string packageDirectory)
{
var args = $"restore --no-dependencies \"{projectOrSolutionFile}\" --packages \"{packageDirectory}\" /p:DisableImplicitNuGetFallbackFolder=true";
return RunCommand(args);
}
public bool New(string folder)
{
var args = $"new console --no-restore --output \"{folder}\"";
return RunCommand(args);
}
public bool AddPackage(string folder, string package)
{
var args = $"add \"{folder}\" package \"{package}\" --no-restore";
return RunCommand(args);
}
}
}

View File

@@ -107,5 +107,16 @@ namespace Semmle.BuildAnalyser
{
logger.Log(Severity.Info, $"Running {command}");
}
public void FailedToRestoreNugetPackage(string package)
{
logger.Log(Severity.Info, $"Failed to restore nuget package {package}");
}
public void FailedToReadFile(string file, Exception ex)
{
logger.Log(Severity.Info, $"Failed to read file {file}");
logger.Log(Severity.Debug, $"Failed to read file {file}, exception: {ex}");
}
}
}