Merge pull request #19943 from asgerf/approximate-related-location

Support approximate related locations
This commit is contained in:
Jonas Jensen
2025-07-11 10:16:24 +02:00
committed by GitHub
4 changed files with 92 additions and 45 deletions

View File

@@ -47,6 +47,18 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig {
node instanceof SimpleTypeSanitizer or
node.asExpr().(MethodCall).getMethod() instanceof LengthRestrictedMethod
}
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp |
regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp()
|
result = sink.getLocation()
or
result = regexp.getLocation()
)
}
}
module PolynomialRedosFlow = TaintTracking::Global<PolynomialRedosConfig>;

View File

@@ -18,17 +18,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
// Diff-informed incremental mode is currently disabled for this query due to
// API limitations. The query exposes sink.getABacktrackingTerm() as an alert
// location, but there is no way to express that information through
// getASelectedSinkLocation() because there is no @location in the CodeQL
// database that corresponds to a term inside a regular expression. As a
// result, this query could miss alerts in diff-informed incremental mode.
//
// To address this problem, we need to have a version of
// getASelectedSinkLocation() that uses hasLocationInfo() instead of
// returning Location objects.
predicate observeDiffInformedIncrementalMode() { none() }
predicate observeDiffInformedIncrementalMode() { any() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
result = sink.(Sink).getHighlight().getLocation()

View File

@@ -18,6 +18,16 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
// Diff-informedness is disabled because of RegExpTerms having incorrect locations when
// the regexp is parsed from a string arising from constant folding.
predicate observeDiffInformedIncrementalMode() { none() }
Location getASelectedSinkLocation(DataFlow::Node sink) {
result = sink.(Sink).getHighlight().getLocation()
or
result = sink.(Sink).getRegExp().getRootTerm().getLocation()
}
}
/**

View File

@@ -8,15 +8,14 @@ module;
private import codeql.util.Location
/**
* Holds if the query should produce alerts that match the given line ranges.
* Holds if the query may restrict its computation to only produce alerts that match the given line
* ranges. This predicate is used for implementing _diff-informed queries_ for pull requests in
* GitHub Code Scanning.
*
* This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no
* effect. If it is active, it accepts any alert that has at least one matching location.
*
* Note that an alert that is not accepted by this filtering predicate may still be included in the
* query results if it is accepted by another active filtering predicate in this module. An alert is
* excluded from the query results if only if (1) there is at least one active filtering predicate,
* and (2) it is not accepted by any active filtering predicate.
* effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_
* location (in SARIF terminology) whose start line is within a specified range. Queries are allowed
* to produce alerts outside this range.
*
* An alert location is a match if it matches a row in this predicate. If `startLineStart` and
* `startLineEnd` are both 0, the row specifies a whole-file match, and a location is a match if
@@ -29,26 +28,24 @@ private import codeql.util.Location
* - startLineStart: inclusive start of the range for alert location start line number (1-based).
* - startLineEnd: inclusive end of the range for alert location start line number (1-based).
*
* A query should either perform no alert filtering, or adhere to all the filtering rules in this
* module and return all and only the accepted alerts.
*
* This predicate is suitable for situations where we want to filter alerts at line granularity,
* such as based on the pull request diff.
* Note that an alert that is not accepted by this filtering predicate may still be included in the
* query results if it is accepted by another active filtering predicate in this module. An alert is
* excluded from the query results if only if (1) there is at least one active filtering predicate,
* and (2) it is not accepted by any active filtering predicate.
*
* See also: `restrictAlertsToExactLocation`.
*/
extensible predicate restrictAlertsTo(string filePath, int startLineStart, int startLineEnd);
/**
* Holds if the query should produce alerts that match the given locations.
* Holds if the query may restrict its computation to only produce alerts that match the given
* character ranges. This predicate is suitable for testing, where we want to filter by the exact
* alert location, distinguishing between alerts on the same line.
*
* This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no
* effect. If it is active, it accepts any alert that has at least one matching location.
*
* Note that an alert that is not accepted by this filtering predicate may still be included in the
* query results if it is accepted by another active filtering predicate in this module. An alert is
* excluded from the query results if only if (1) there is at least one active filtering predicate,
* and (2) it is not accepted by any active filtering predicate.
* effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_
* location (in SARIF terminology) whose location equals a tuple from this predicate. Queries are
* allowed to produce alerts outside this range.
*
* An alert location is a match if it matches a row in this predicate. Each row specifies an exact
* location: an alert location is a match if its file path matches `filePath`, its start line and
@@ -61,11 +58,10 @@ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int s
* - endLine: alert location end line number (1-based).
* - endColumn: alert location end column number (1-based).
*
* A query should either perform no alert filtering, or adhere to all the filtering rules in this
* module and return all and only the accepted alerts.
*
* This predicate is suitable for situations where we want to filter by the exact alert location,
* distinguishing between alerts on the same line.
* Note that an alert that is not accepted by this filtering predicate may still be included in the
* query results if it is accepted by another active filtering predicate in this module. An alert is
* excluded from the query results if only if (1) there is at least one active filtering predicate,
* and (2) it is not accepted by any active filtering predicate.
*
* See also: `restrictAlertsTo`.
*/
@@ -75,25 +71,64 @@ extensible predicate restrictAlertsToExactLocation(
/** Module for applying alert location filtering. */
module AlertFilteringImpl<LocationSig Location> {
/** Applies alert filtering to the given location. */
pragma[nomagic]
private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) }
pragma[nomagic]
private predicate restrictAlertsToLine(string filePath, int line) {
exists(int startLineStart, int startLineEnd |
restrictAlertsTo(filePath, startLineStart, startLineEnd) and
line = [startLineStart .. startLineEnd]
)
}
/**
* Holds if the given location intersects the diff range. When that is the
* case, ensuring that alerts mentioning this location are included in the
* query results is a valid overapproximation of the requirements for
* _diff-informed queries_ as documented under `restrictAlertsTo`.
*
* Testing for full intersection rather than only matching the start line
* means that this predicate is more broadly useful than just checking whether
* a specific element is considered to be in the diff range of GitHub Code
* Scanning:
* - If it's inconvenient to pass the exact `Location` of the element of
* interest, it's valid to use a `Location` of an enclosing element.
* - This predicate could be useful for other systems of alert presentation
* where the rules don't exactly match GitHub Code Scanning.
*
* If there is no diff range, this predicate holds for all locations. Note
* that this predicate has a bindingset and will therefore be inlined;
* callers should include enough context to ensure efficient evaluation.
*/
bindingset[location]
predicate filterByLocation(Location location) {
not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _)
or
exists(string filePath, int startLineStart, int startLineEnd |
restrictAlertsTo(filePath, startLineStart, startLineEnd)
|
startLineStart = 0 and
startLineEnd = 0 and
exists(string filePath |
restrictAlertsToEntireFile(filePath) and
location.hasLocationInfo(filePath, _, _, _, _)
or
location.hasLocationInfo(filePath, [startLineStart .. startLineEnd], _, _, _)
exists(int locStartLine, int locEndLine |
location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _)
|
restrictAlertsToLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine])
)
)
or
exists(string filePath, int startLine, int startColumn, int endLine, int endColumn |
restrictAlertsToExactLocation(filePath, startLine, startColumn, endLine, endColumn)
// Check if an exact filter-location is fully contained in `location`.
// This is slow but only used for testing.
exists(
string filePath, int startLine, int startColumn, int endLine, int endColumn,
int filterStartLine, int filterStartColumn, int filterEndLine, int filterEndColumn
|
location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn)
location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) and
restrictAlertsToExactLocation(filePath, filterStartLine, filterStartColumn, filterEndLine,
filterEndColumn) and
startLine <= filterStartLine and
(startLine != filterStartLine or startColumn <= filterStartColumn) and
endLine >= filterEndLine and
(endLine != filterEndLine or endColumn >= filterEndColumn)
)
}
}