Ruby: use AccessPathSyntax.qll to parse input/output summary specs

This commit is contained in:
Asger Feldthaus
2022-02-07 15:18:13 +01:00
parent 0af9e8aa58
commit 6dbeb81f36
5 changed files with 174 additions and 85 deletions

View File

@@ -505,6 +505,7 @@
"AccessPathSyntax": [
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll"
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
]
}

View File

@@ -0,0 +1,99 @@
/**
* Module for parsing access paths from CSV models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/** Companion module to the `AccessPath` class. */
module AccessPath {
/** A string that should be parsed as an access path. */
abstract class Range extends string {
bindingset[this]
Range() { any() }
}
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
class AccessPath extends string instanceof AccessPath::Range {
/** Gets the `n`th token on the access path as a string. */
string getRawToken(int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check to ensure all characters were included in a token.
result = this.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = this.getRawToken(n) and
not hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(this.getRawToken(n))) and
not hasSyntaxError()
}
/** Gets the `n`th-last token, with 0 being the last token. */
AccessPathToken getLastToken(int n) { result = getToken(getNumToken() - 1 - n) }
}
/**
* An access path that uses `A of B` syntax, which should now be written as `B.A`.
*
* This is a compatibility layer to help test at checkpoints during transition to the new syntax.
*/
private class LegacyAccessPath extends AccessPath {
LegacyAccessPath() { this.matches("% of %") }
private string getRawSplit(int n) { result = this.splitAt(" of ", n) }
private int getNumRawSplits() { result = strictcount(int n | exists(getRawSplit(n))) }
override string getRawToken(int n) { result = getRawSplit(getNumRawSplits() - n - 1) }
override predicate hasSyntaxError() { none() }
}
/**
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends string {
AccessPathToken() { this = any(AccessPath path).getRawToken(_) }
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n) }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}

View File

@@ -228,6 +228,7 @@ module Public {
*/
module Private {
private import Public
import AccessPathSyntax
newtype TSummaryComponent =
TContentSummaryComponent(Content c) or
@@ -811,69 +812,46 @@ module Private {
sinkElement(_, spec, _)
}
/** Holds if the `n`th component of specification `s` is `c`. */
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
/** Holds if specification `s` has length `len`. */
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
/** Gets the last component of specification `s`. */
string specLast(string s) {
exists(int len |
specLength(s, len) and
specSplit(s, result, len - 1)
)
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() { relevantSpec(this) }
}
/** Holds if specification component `c` parses as parameter `n`. */
predicate parseParam(string c, ArgumentPosition pos) {
specSplit(_, c, _) and
exists(string body |
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1) and
pos = parseParamBody(body)
)
predicate parseParam(AccessPathToken token, ArgumentPosition pos) {
token.getName() = "Parameter" and
pos = parseParamBody(token.getAnArgument())
}
/** Holds if specification component `c` parses as argument `n`. */
predicate parseArg(string c, ParameterPosition pos) {
specSplit(_, c, _) and
exists(string body |
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1) and
pos = parseArgBody(body)
)
predicate parseArg(AccessPathToken token, ParameterPosition pos) {
token.getName() = "Argument" and
pos = parseArgBody(token.getAnArgument())
}
private SummaryComponent interpretComponent(string c) {
specSplit(_, c, _) and
(
exists(ParameterPosition pos |
parseArg(c, pos) and result = SummaryComponent::argument(pos)
)
or
exists(ArgumentPosition pos |
parseParam(c, pos) and result = SummaryComponent::parameter(pos)
)
or
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(c)
private SummaryComponent interpretComponent(AccessPathToken token) {
exists(ParameterPosition pos |
parseArg(token, pos) and result = SummaryComponent::argument(pos)
)
or
exists(ArgumentPosition pos |
parseParam(token, pos) and result = SummaryComponent::parameter(pos)
)
or
token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(token)
}
/**
* Holds if `spec` specifies summary component stack `stack`.
*/
predicate interpretSpec(string spec, SummaryComponentStack stack) {
predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) {
interpretSpec(spec, 0, stack)
}
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
exists(string c |
relevantSpec(spec) and
specLength(spec, idx + 1) and
specSplit(spec, c, idx) and
stack = SummaryComponentStack::singleton(interpretComponent(c))
)
private predicate interpretSpec(AccessPath spec, int idx, SummaryComponentStack stack) {
idx = spec.getNumToken() - 1 and
stack = SummaryComponentStack::singleton(interpretComponent(spec.getLastToken(idx)))
or
exists(SummaryComponent head, SummaryComponentStack tail |
interpretSpec(spec, idx, head, tail) and
@@ -882,13 +860,10 @@ module Private {
}
private predicate interpretSpec(
string output, int idx, SummaryComponent head, SummaryComponentStack tail
AccessPath output, int idx, SummaryComponent head, SummaryComponentStack tail
) {
exists(string c |
interpretSpec(output, idx + 1, tail) and
specSplit(output, c, idx) and
head = interpretComponent(c)
)
interpretSpec(output, idx + 1, tail) and
head = interpretComponent(output.getLastToken(idx))
}
private class MkStack extends RequiredSummaryComponentStack {
@@ -903,7 +878,7 @@ module Private {
override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
exists(string inSpec, string outSpec, string kind |
exists(AccessPath inSpec, AccessPath outSpec, string kind |
summaryElement(this, inSpec, outSpec, kind) and
interpretSpec(inSpec, input) and
interpretSpec(outSpec, output)
@@ -916,50 +891,55 @@ module Private {
}
/** Holds if component `c` of specification `spec` cannot be parsed. */
predicate invalidSpecComponent(string spec, string c) {
specSplit(spec, c, _) and
predicate invalidSpecComponent(AccessPath spec, string c) {
c = spec.getRawToken(_) and
not exists(interpretComponent(c))
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
private predicate inputNeedsReference(AccessPathToken c) {
c.getName() = "Argument" or
inputNeedsReferenceSpecific(c)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue" or
private predicate outputNeedsReference(AccessPathToken c) {
c.getName() = ["Argument", "ReturnValue"] or
outputNeedsReferenceSpecific(c)
}
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) {
exists(SourceOrSinkElement e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
if outputNeedsReference(output.getToken(0))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) {
exists(SourceOrSinkElement e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
if inputNeedsReference(input.getToken(0))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
private predicate interpretOutput(
AccessPath output, int idx, InterpretNode ref, InterpretNode node
) {
sourceElementRef(ref, output, _) and
specLength(output, idx) and
node = ref
idx = output.getNumToken() and
(
if output = ""
then
// Allow language-specific interpretation of the empty access path
interpretOutputSpecific("", ref, node)
else node = ref
)
or
exists(InterpretNode mid, string c |
exists(InterpretNode mid, AccessPathToken c |
interpretOutput(output, idx + 1, ref, mid) and
specSplit(output, c, idx)
c = output.getLastToken(idx)
|
exists(ArgumentPosition apos, ParameterPosition ppos |
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@@ -982,14 +962,22 @@ module Private {
)
}
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
private predicate interpretInput(
AccessPath input, int idx, InterpretNode ref, InterpretNode node
) {
sinkElementRef(ref, input, _) and
specLength(input, idx) and
node = ref
idx = input.getNumToken() and
(
if input = ""
then
// Allow language-specific interpretation of the empty access path
interpretInputSpecific("", ref, node)
else node = ref
)
or
exists(InterpretNode mid, string c |
exists(InterpretNode mid, AccessPathToken c |
interpretInput(input, idx + 1, ref, mid) and
specSplit(input, c, idx)
c = input.getLastToken(idx)
|
exists(ArgumentPosition apos, ParameterPosition ppos |
node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and

View File

@@ -159,16 +159,16 @@ module ParsePositions {
private import FlowSummaryImpl
private predicate isParamBody(string body) {
exists(string c |
Private::External::specSplit(_, c, _) and
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1)
exists(AccessPathToken tok |
tok.getName() = "Parameter" and
body = tok.getAnArgument()
)
}
private predicate isArgBody(string body) {
exists(string c |
Private::External::specSplit(_, c, _) and
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1)
exists(AccessPathToken tok |
tok.getName() = "Argument" and
body = tok.getAnArgument()
)
}

View File

@@ -7,15 +7,16 @@ import codeql.ruby.dataflow.FlowSummary
import DataFlow::PathGraph
import codeql.ruby.TaintTracking
import codeql.ruby.dataflow.internal.FlowSummaryImpl
import codeql.ruby.dataflow.internal.AccessPathSyntax
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
Private::External::invalidSpecComponent(s, c)
}
query predicate invalidOutputSpecComponent(SummarizedCallable sc, string s, string c) {
query predicate invalidOutputSpecComponent(SummarizedCallable sc, AccessPath s, AccessPathToken c) {
sc.propagatesFlowExt(_, s, _) and
Private::External::specSplit(s, c, _) and
c = s.getToken(_) and
c = "ArrayElement" // not allowed in output specs; use `ArrayElement[?] instead
}