Merge pull request #3456 from github/koesie10/unique-database-names

Make database storage paths more unique
This commit is contained in:
Koen Vlaswinkel
2024-03-11 16:40:46 +01:00
committed by GitHub
5 changed files with 191 additions and 57 deletions

View File

@@ -0,0 +1,44 @@
type FilenameOptions = {
removeDots?: boolean;
};
/**
* This will create a filename from an arbitrary string by removing
* all characters which are not allowed in filenames and making them
* more filesystem-friendly be replacing undesirable characters with
* hyphens. The result will always be lowercase ASCII.
*
* @param str The string to create a filename from
* @param removeDots Whether to remove dots from the filename [default: false]
* @returns The filename
*/
export function createFilenameFromString(
str: string,
{ removeDots }: FilenameOptions = {},
) {
let fileName = str;
// Lowercase everything
fileName = fileName.toLowerCase();
// Replace all spaces, underscores, slashes, and backslashes with hyphens
fileName = fileName.replaceAll(/[\s_/\\]+/g, "-");
// Replace all characters which are not allowed by empty strings
fileName = fileName.replaceAll(/[^a-z0-9.-]/g, "");
// Remove any leading or trailing hyphens or dots
fileName = fileName.replaceAll(/^[.-]+|[.-]+$/g, "");
// Replace dots by hyphens if dots are not allowed
if (removeDots) {
fileName = fileName.replaceAll(/\./g, "-");
}
// Remove any duplicate hyphens
fileName = fileName.replaceAll(/-{2,}/g, "-");
// Remove any duplicate dots
fileName = fileName.replaceAll(/\.{2,}/g, ".");
return fileName;
}

View File

@@ -10,9 +10,11 @@ import {
pathExists,
createWriteStream,
remove,
readdir,
} from "fs-extra";
import { basename, join } from "path";
import type { Octokit } from "@octokit/rest";
import { nanoid } from "nanoid";
import type { DatabaseManager, DatabaseItem } from "./local-databases";
import { tmpDir } from "../tmp-dir";
@@ -36,6 +38,7 @@ import { AppOctokit } from "../common/octokit";
import type { DatabaseOrigin } from "./local-databases/database-origin";
import { createTimeoutSignal } from "../common/fetch-stream";
import type { App } from "../common/app";
import { createFilenameFromString } from "../common/filenames";
import { findDirWithFile } from "../common/files";
import { convertGithubNwoToDatabaseUrl } from "./github-databases/api";
@@ -364,7 +367,11 @@ async function databaseArchiveFetcher(
throw new Error("No storage path specified.");
}
await ensureDir(storagePath);
const unzipPath = await getStorageFolder(storagePath, databaseUrl);
const unzipPath = await getStorageFolder(
storagePath,
databaseUrl,
nameOverride,
);
if (isFile(databaseUrl)) {
await readAndUnzip(databaseUrl, unzipPath, cli, progress);
@@ -408,31 +415,60 @@ async function databaseArchiveFetcher(
}
}
async function getStorageFolder(storagePath: string, urlStr: string) {
// we need to generate a folder name for the unzipped archive,
// this needs to be human readable since we may use this name as the initial
// name for the database
const url = Uri.parse(urlStr);
// MacOS has a max filename length of 255
// and remove a few extra chars in case we need to add a counter at the end.
let lastName = basename(url.path).substring(0, 250);
if (lastName.endsWith(".zip")) {
lastName = lastName.substring(0, lastName.length - 4);
// The number of tries to use when generating a unique filename before
// giving up and using a nanoid.
const DUPLICATE_FILENAMES_TRIES = 10_000;
async function getStorageFolder(
storagePath: string,
urlStr: string,
nameOverrride?: string,
) {
let lastName: string;
if (nameOverrride) {
lastName = createFilenameFromString(nameOverrride);
} else {
// we need to generate a folder name for the unzipped archive,
// this needs to be human readable since we may use this name as the initial
// name for the database
const url = Uri.parse(urlStr);
// MacOS has a max filename length of 255
// and remove a few extra chars in case we need to add a counter at the end.
lastName = basename(url.path).substring(0, 250);
if (lastName.endsWith(".zip")) {
lastName = lastName.substring(0, lastName.length - 4);
}
}
const realpath = await fs_realpath(storagePath);
let folderName = join(realpath, lastName);
let folderName = lastName;
// get all existing files instead of calling pathExists on every
// single combination of realpath and folderName
const existingFiles = await readdir(realpath);
// avoid overwriting existing folders
let counter = 0;
while (await pathExists(folderName)) {
while (existingFiles.includes(basename(folderName))) {
counter++;
folderName = join(realpath, `${lastName}-${counter}`);
if (counter > 100) {
throw new Error("Could not find a unique name for downloaded database.");
if (counter <= DUPLICATE_FILENAMES_TRIES) {
// First try to use a counter to make the name unique.
folderName = `${lastName}-${counter}`;
} else if (counter <= DUPLICATE_FILENAMES_TRIES + 5) {
// If there are more than 10,000 similarly named databases,
// give up on using a counter and use a random string instead.
folderName = `${lastName}-${nanoid()}`;
} else {
// This should almost never happen, but just in case, we don't want to
// get stuck in an infinite loop.
throw new Error(
"Could not find a unique name for downloaded database. Please remove some databases and try again.",
);
}
}
return folderName;
return join(realpath, folderName);
}
function validateUrl(databaseUrl: string) {

View File

@@ -1,3 +1,5 @@
import { createFilenameFromString } from "../common/filenames";
const packNamePartRegex = /[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/;
const packNameRegex = new RegExp(
`^(?<scope>${packNamePartRegex.source})/(?<name>${packNamePartRegex.source})$`,
@@ -23,7 +25,11 @@ export function autoNameExtensionPack(
}
const parts = packName.split("/");
const sanitizedParts = parts.map((part) => sanitizeExtensionPackName(part));
const sanitizedParts = parts.map((part) =>
createFilenameFromString(part, {
removeDots: true,
}),
);
// If the scope is empty (e.g. if the given name is "-/b"), then we need to still set a scope
if (sanitizedParts[0].length === 0) {
@@ -37,25 +43,6 @@ export function autoNameExtensionPack(
};
}
function sanitizeExtensionPackName(name: string) {
// Lowercase everything
name = name.toLowerCase();
// Replace all spaces, dots, and underscores with hyphens
name = name.replaceAll(/[\s._]+/g, "-");
// Replace all characters which are not allowed by empty strings
name = name.replaceAll(/[^a-z0-9-]/g, "");
// Remove any leading or trailing hyphens
name = name.replaceAll(/^-|-$/g, "");
// Remove any duplicate hyphens
name = name.replaceAll(/-{2,}/g, "-");
return name;
}
export function parsePackName(packName: string): ExtensionPackName | undefined {
const matches = packNameRegex.exec(packName);
if (!matches?.groups) {

View File

@@ -20,6 +20,7 @@ import type {
ModelExtension,
ModelExtensionFile,
} from "./model-extension-file";
import { createFilenameFromString } from "../common/filenames";
import type { QueryLanguage } from "../common/query-language";
import modelExtensionFileSchema from "./model-extension-file.schema.json";
@@ -275,26 +276,7 @@ export function createFilenameForLibrary(
prefix = "models/",
suffix = ".model",
) {
let libraryName = library;
// Lowercase everything
libraryName = libraryName.toLowerCase();
// Replace all spaces and underscores with hyphens
libraryName = libraryName.replaceAll(/[\s_]+/g, "-");
// Replace all characters which are not allowed by empty strings
libraryName = libraryName.replaceAll(/[^a-z0-9.-]/g, "");
// Remove any leading or trailing hyphens or dots
libraryName = libraryName.replaceAll(/^[.-]+|[.-]+$/g, "");
// Remove any duplicate hyphens
libraryName = libraryName.replaceAll(/-{2,}/g, "-");
// Remove any duplicate dots
libraryName = libraryName.replaceAll(/\.{2,}/g, ".");
return `${prefix}${libraryName}${suffix}.yml`;
return `${prefix}${createFilenameFromString(library)}${suffix}.yml`;
}
export function createFilenameForPackage(

View File

@@ -0,0 +1,85 @@
import { createFilenameFromString } from "../../../src/common/filenames";
describe("createFilenameFromString", () => {
const testCases: Array<{
input: string;
filename: string;
filenameWithoutDots?: string;
}> = [
{
input: "sql2o",
filename: "sql2o",
},
{
input: "spring-boot",
filename: "spring-boot",
},
{
input: "spring--boot",
filename: "spring-boot",
},
{
input: "rt",
filename: "rt",
},
{
input: "System.Runtime",
filename: "system.runtime",
filenameWithoutDots: "system-runtime",
},
{
input: "System..Runtime",
filename: "system.runtime",
filenameWithoutDots: "system-runtime",
},
{
input: "google/brotli",
filename: "google-brotli",
},
{
input: "github/vscode-codeql",
filename: "github-vscode-codeql",
},
{
input: "github/vscode---codeql--",
filename: "github-vscode-codeql",
},
{
input: "github...vscode--c..odeql",
filename: "github.vscode-c.odeql",
filenameWithoutDots: "github-vscode-c-odeql",
},
{
input: "github\\vscode-codeql",
filename: "github-vscode-codeql",
},
{
input: "uNetworking/uWebSockets.js",
filename: "unetworking-uwebsockets.js",
filenameWithoutDots: "unetworking-uwebsockets-js",
},
{
input: "github/.vscode-codeql",
filename: "github-.vscode-codeql",
filenameWithoutDots: "github-vscode-codeql",
},
];
test.each(testCases)(
"returns $filename if string is $input",
({ input, filename }) => {
expect(createFilenameFromString(input)).toEqual(filename);
},
);
test.each(testCases)(
"returns $filename if string is $input and dots are not allowed",
({ input, filename, filenameWithoutDots }) => {
expect(
createFilenameFromString(input, {
removeDots: true,
}),
).toEqual(filenameWithoutDots ?? filename);
},
);
});