Merge pull request #3218 from github/koesie10/parse-access-paths

Add functions for parsing and validating access paths
This commit is contained in:
Koen Vlaswinkel
2024-01-11 13:34:01 +01:00
committed by GitHub
2 changed files with 379 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
/**
* This file contains functions for parsing and validating access paths.
*
* This intentionally does not simply split by '.' since tokens may contain dots,
* e.g. `Field[foo.Bar.x]`. Instead, it uses some simple parsing to match valid tokens.
*
* Valid syntax was determined based on this file:
* https://github.com/github/codeql/blob/a04830b8b2d3e5f7df8e1f80f06c020b987a89a3/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll
*
* In contrast to that file, we do not use a regex for parsing to allow us to be more lenient.
* For example, we can parse partial access paths such as `Field[foo.Bar.x` without error.
*/
/**
* A range of characters in an access path. The start position is inclusive, the end position is exclusive.
*/
type AccessPathRange = {
/**
* Zero-based index of the first character of the token.
*/
start: number;
/**
* Zero-based index of the character after the last character of the token.
*/
end: number;
};
/**
* A token in an access path. For example, `Argument[foo]` is a token.
*/
type AccessPartToken = {
text: string;
range: AccessPathRange;
};
/**
* Parses an access path into tokens.
*
* @param path The access path to parse.
* @returns An array of tokens.
*/
export function parseAccessPathTokens(path: string): AccessPartToken[] {
const parts: AccessPartToken[] = [];
let currentPart = "";
let currentPathStart = 0;
// Keep track of the number of brackets we can parse the path correctly when it contains
// nested brackets such as `Argument[foo[bar].test].Element`.
let bracketCounter = 0;
for (let i = 0; i < path.length; i++) {
const c = path[i];
if (c === "[") {
bracketCounter++;
} else if (c === "]") {
bracketCounter--;
} else if (c === "." && bracketCounter === 0) {
// A part ends when we encounter a dot that is not inside brackets.
parts.push({
text: currentPart,
range: {
start: currentPathStart,
end: i,
},
});
currentPart = "";
currentPathStart = i + 1;
continue;
}
currentPart += c;
}
// The last part should not be followed by a dot, so we need to add it manually.
// If the path is empty, such as for `Argument[foo].`, then this is still correct
// since the `validateAccessPath` function will check that none of the tokens are
// empty.
parts.push({
text: currentPart,
range: {
start: currentPathStart,
end: path.length,
},
});
return parts;
}
// Regex for a single part of the access path
const tokenRegex = /^(\w+)(?:\[([^\]]*)])?$/;
type AccessPathDiagnostic = {
range: AccessPathRange;
message: string;
};
/**
* Validates an access path and returns any errors. This requires that the path is a valid path
* and does not allow partial access paths.
*
* @param path The access path to validate.
* @returns An array of diagnostics for any errors in the access path.
*/
export function validateAccessPath(path: string): AccessPathDiagnostic[] {
if (path === "") {
return [];
}
const tokens = parseAccessPathTokens(path);
return tokens
.map((token): AccessPathDiagnostic | null => {
if (tokenRegex.test(token.text)) {
return null;
}
let message = "Invalid access path";
if (token.range.start === token.range.end) {
message = "Unexpected empty token";
}
return {
range: token.range,
message,
};
})
.filter((token): token is AccessPathDiagnostic => token !== null);
}

View File

@@ -0,0 +1,251 @@
import {
parseAccessPathTokens,
validateAccessPath,
} from "../../../../src/model-editor/shared/access-paths";
describe("parseAccessPathTokens", () => {
it.each([
{
path: "Argument[foo].Element.Field[@test]",
parts: [
{
range: {
start: 0,
end: 13,
},
text: "Argument[foo]",
},
{
range: {
start: 14,
end: 21,
},
text: "Element",
},
{
range: {
start: 22,
end: 34,
},
text: "Field[@test]",
},
],
},
{
path: "Argument[foo].Element.Field[foo.Bar.x]",
parts: [
{
range: {
start: 0,
end: 13,
},
text: "Argument[foo]",
},
{
range: {
start: 14,
end: 21,
},
text: "Element",
},
{
range: {
start: 22,
end: 38,
},
text: "Field[foo.Bar.x]",
},
],
},
{
path: "Argument[",
parts: [
{
range: {
start: 0,
end: 9,
},
text: "Argument[",
},
],
},
{
path: "Argument[se",
parts: [
{
range: {
start: 0,
end: 11,
},
text: "Argument[se",
},
],
},
{
path: "Argument[foo].Field[",
parts: [
{
range: {
start: 0,
end: 13,
},
text: "Argument[foo]",
},
{
range: {
start: 14,
end: 20,
},
text: "Field[",
},
],
},
{
path: "Argument[foo].",
parts: [
{
text: "Argument[foo]",
range: {
end: 13,
start: 0,
},
},
{
text: "",
range: {
end: 14,
start: 14,
},
},
],
},
{
path: "Argument[foo]..",
parts: [
{
text: "Argument[foo]",
range: {
end: 13,
start: 0,
},
},
{
text: "",
range: {
end: 14,
start: 14,
},
},
{
text: "",
range: {
end: 15,
start: 15,
},
},
],
},
{
path: "Argument[foo[bar].test].Element.",
parts: [
{
range: {
start: 0,
end: 23,
},
text: "Argument[foo[bar].test]",
},
{
range: {
start: 24,
end: 31,
},
text: "Element",
},
{
range: {
start: 32,
end: 32,
},
text: "",
},
],
},
])(`parses correctly for $path`, ({ path, parts }) => {
expect(parseAccessPathTokens(path)).toEqual(parts);
});
});
describe("validateAccessPath", () => {
it.each([
{
path: "Argument[foo].Element.Field[@test]",
diagnostics: [],
},
{
path: "Argument[foo].Element.Field[foo.Bar.x]",
diagnostics: [],
},
{
path: "Argument[",
diagnostics: [
{
message: "Invalid access path",
range: {
start: 0,
end: 9,
},
},
],
},
{
path: "Argument[se",
diagnostics: [
{
message: "Invalid access path",
range: {
start: 0,
end: 11,
},
},
],
},
{
path: "Argument[foo].Field[",
diagnostics: [
{
message: "Invalid access path",
range: {
start: 14,
end: 20,
},
},
],
},
{
path: "Argument[foo].",
diagnostics: [
{ message: "Unexpected empty token", range: { start: 14, end: 14 } },
],
},
{
path: "Argument[foo]..",
diagnostics: [
{ message: "Unexpected empty token", range: { start: 14, end: 14 } },
{ message: "Unexpected empty token", range: { start: 15, end: 15 } },
],
},
{
path: "Argument[foo[bar].test].Element.",
diagnostics: [
{ message: "Invalid access path", range: { start: 0, end: 23 } },
{ message: "Unexpected empty token", range: { start: 32, end: 32 } },
],
},
])(
`validates $path correctly with $diagnostics.length errors`,
({ path, diagnostics }) => {
expect(validateAccessPath(path)).toEqual(diagnostics);
},
);
});