From ea6e148df9acf50e913c165f772b9dfd42bd8b9c Mon Sep 17 00:00:00 2001 From: Koen Vlaswinkel Date: Wed, 10 Jan 2024 16:20:13 +0100 Subject: [PATCH] Add functions for parsing and validating access paths This adds functions for parsing and validating access paths to prepare for future functionality where we're going to be parsing and validating access paths. --- .../src/model-editor/shared/access-paths.ts | 128 +++++++++ .../model-editor/shared/access-paths.test.ts | 251 ++++++++++++++++++ 2 files changed, 379 insertions(+) create mode 100644 extensions/ql-vscode/src/model-editor/shared/access-paths.ts create mode 100644 extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts diff --git a/extensions/ql-vscode/src/model-editor/shared/access-paths.ts b/extensions/ql-vscode/src/model-editor/shared/access-paths.ts new file mode 100644 index 000000000..ba0c22006 --- /dev/null +++ b/extensions/ql-vscode/src/model-editor/shared/access-paths.ts @@ -0,0 +1,128 @@ +/** + * This file contains functions for parsing and validating access paths. + * + * This intentionally does not simply split by '.' since tokens may contain dots, + * e.g. `Field[foo.Bar.x]`. Instead, it uses some simple parsing to match valid tokens. + * + * Valid syntax was determined based on this file: + * https://github.com/github/codeql/blob/a04830b8b2d3e5f7df8e1f80f06c020b987a89a3/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll + * + * In contrast to that file, we do not use a regex for parsing to allow us to be more lenient. + * For example, we can parse partial access paths such as `Field[foo.Bar.x` without error. + */ + +/** + * A range of characters in an access path. The start position is inclusive, the end position is exclusive. + */ +type AccessPathRange = { + /** + * Zero-based index of the first character of the token. + */ + start: number; + /** + * Zero-based index of the character after the last character of the token. + */ + end: number; +}; + +/** + * A token in an access path. For example, `Argument[foo]` is a token. + */ +type AccessPartToken = { + text: string; + range: AccessPathRange; +}; + +/** + * Parses an access path into tokens. + * + * @param path The access path to parse. + * @returns An array of tokens. + */ +export function parseAccessPathTokens(path: string): AccessPartToken[] { + const parts: AccessPartToken[] = []; + + let currentPart = ""; + let currentPathStart = 0; + // Keep track of the number of brackets we can parse the path correctly when it contains + // nested brackets such as `Argument[foo[bar].test].Element`. + let bracketCounter = 0; + for (let i = 0; i < path.length; i++) { + const c = path[i]; + + if (c === "[") { + bracketCounter++; + } else if (c === "]") { + bracketCounter--; + } else if (c === "." && bracketCounter === 0) { + // A part ends when we encounter a dot that is not inside brackets. + parts.push({ + text: currentPart, + range: { + start: currentPathStart, + end: i, + }, + }); + currentPart = ""; + currentPathStart = i + 1; + continue; + } + + currentPart += c; + } + + // The last part should not be followed by a dot, so we need to add it manually. + // If the path is empty, such as for `Argument[foo].`, then this is still correct + // since the `validateAccessPath` function will check that none of the tokens are + // empty. + parts.push({ + text: currentPart, + range: { + start: currentPathStart, + end: path.length, + }, + }); + + return parts; +} + +// Regex for a single part of the access path +const tokenRegex = /^(\w+)(?:\[([^\]]*)])?$/; + +type AccessPathDiagnostic = { + range: AccessPathRange; + message: string; +}; + +/** + * Validates an access path and returns any errors. This requires that the path is a valid path + * and does not allow partial access paths. + * + * @param path The access path to validate. + * @returns An array of diagnostics for any errors in the access path. + */ +export function validateAccessPath(path: string): AccessPathDiagnostic[] { + if (path === "") { + return []; + } + + const tokens = parseAccessPathTokens(path); + + return tokens + .map((token): AccessPathDiagnostic | null => { + if (tokenRegex.test(token.text)) { + return null; + } + + let message = "Invalid access path"; + if (token.range.start === token.range.end) { + message = "Unexpected empty token"; + } + + return { + range: token.range, + message, + }; + }) + .filter((token): token is AccessPathDiagnostic => token !== null); +} diff --git a/extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts b/extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts new file mode 100644 index 000000000..52933670b --- /dev/null +++ b/extensions/ql-vscode/test/unit-tests/model-editor/shared/access-paths.test.ts @@ -0,0 +1,251 @@ +import { + parseAccessPathTokens, + validateAccessPath, +} from "../../../../src/model-editor/shared/access-paths"; + +describe("parseAccessPathTokens", () => { + it.each([ + { + path: "Argument[foo].Element.Field[@test]", + parts: [ + { + range: { + start: 0, + end: 13, + }, + text: "Argument[foo]", + }, + { + range: { + start: 14, + end: 21, + }, + text: "Element", + }, + { + range: { + start: 22, + end: 34, + }, + text: "Field[@test]", + }, + ], + }, + { + path: "Argument[foo].Element.Field[foo.Bar.x]", + parts: [ + { + range: { + start: 0, + end: 13, + }, + text: "Argument[foo]", + }, + { + range: { + start: 14, + end: 21, + }, + text: "Element", + }, + { + range: { + start: 22, + end: 38, + }, + text: "Field[foo.Bar.x]", + }, + ], + }, + { + path: "Argument[", + parts: [ + { + range: { + start: 0, + end: 9, + }, + text: "Argument[", + }, + ], + }, + { + path: "Argument[se", + parts: [ + { + range: { + start: 0, + end: 11, + }, + text: "Argument[se", + }, + ], + }, + { + path: "Argument[foo].Field[", + parts: [ + { + range: { + start: 0, + end: 13, + }, + text: "Argument[foo]", + }, + { + range: { + start: 14, + end: 20, + }, + text: "Field[", + }, + ], + }, + { + path: "Argument[foo].", + parts: [ + { + text: "Argument[foo]", + range: { + end: 13, + start: 0, + }, + }, + { + text: "", + range: { + end: 14, + start: 14, + }, + }, + ], + }, + { + path: "Argument[foo]..", + parts: [ + { + text: "Argument[foo]", + range: { + end: 13, + start: 0, + }, + }, + { + text: "", + range: { + end: 14, + start: 14, + }, + }, + { + text: "", + range: { + end: 15, + start: 15, + }, + }, + ], + }, + { + path: "Argument[foo[bar].test].Element.", + parts: [ + { + range: { + start: 0, + end: 23, + }, + text: "Argument[foo[bar].test]", + }, + { + range: { + start: 24, + end: 31, + }, + text: "Element", + }, + { + range: { + start: 32, + end: 32, + }, + text: "", + }, + ], + }, + ])(`parses correctly for $path`, ({ path, parts }) => { + expect(parseAccessPathTokens(path)).toEqual(parts); + }); +}); + +describe("validateAccessPath", () => { + it.each([ + { + path: "Argument[foo].Element.Field[@test]", + diagnostics: [], + }, + { + path: "Argument[foo].Element.Field[foo.Bar.x]", + diagnostics: [], + }, + { + path: "Argument[", + diagnostics: [ + { + message: "Invalid access path", + range: { + start: 0, + end: 9, + }, + }, + ], + }, + { + path: "Argument[se", + diagnostics: [ + { + message: "Invalid access path", + range: { + start: 0, + end: 11, + }, + }, + ], + }, + { + path: "Argument[foo].Field[", + diagnostics: [ + { + message: "Invalid access path", + range: { + start: 14, + end: 20, + }, + }, + ], + }, + { + path: "Argument[foo].", + diagnostics: [ + { message: "Unexpected empty token", range: { start: 14, end: 14 } }, + ], + }, + { + path: "Argument[foo]..", + diagnostics: [ + { message: "Unexpected empty token", range: { start: 14, end: 14 } }, + { message: "Unexpected empty token", range: { start: 15, end: 15 } }, + ], + }, + { + path: "Argument[foo[bar].test].Element.", + diagnostics: [ + { message: "Invalid access path", range: { start: 0, end: 23 } }, + { message: "Unexpected empty token", range: { start: 32, end: 32 } }, + ], + }, + ])( + `validates $path correctly with $diagnostics.length errors`, + ({ path, diagnostics }) => { + expect(validateAccessPath(path)).toEqual(diagnostics); + }, + ); +});