Merge branch 'main' into py/add-ssrf-sinks

This commit is contained in:
Rasmus Wriedt Larsen
2022-03-04 11:50:12 +01:00
414 changed files with 32260 additions and 31653 deletions

View File

@@ -1,3 +1,9 @@
## 0.0.10
### Deprecated APIs
* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.
## 0.0.9
## 0.0.8

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.

View File

@@ -1,4 +1,5 @@
---
category: deprecated
---
## 0.0.10
### Deprecated APIs
* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.9
lastReleaseVersion: 0.0.10

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.0.10-dev
version: 0.0.11-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -10,6 +10,7 @@ class Pattern extends Pattern_, AstNode {
override Scope getScope() { result = this.getCase().getScope() }
/** Gets the case statement containing this pattern */
pragma[nomagic]
Case getCase() { result.contains(this) }
override string toString() { result = "Pattern" }

View File

@@ -39,7 +39,12 @@ newtype TRegExpParent =
/** A special character */
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
/** A normal character */
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
TRegExpNormalChar(Regex re, int start, int end) {
re.normalCharacterSequence(start, end)
or
re.escapedCharacter(start, end) and
not re.specialCharacter(start, end, _)
} or
/** A back reference */
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }

View File

@@ -204,6 +204,8 @@ abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
override AttrNode node;
AttributeReadAsAttrRead() { node.isLoad() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {

View File

@@ -126,7 +126,7 @@ module syntheticPostUpdateNode {
* Certain arguments, such as implicit self arguments are already post-update nodes
* and should not have an extra node synthesised.
*/
ArgumentNode argumentPreUpdateNode() {
Node argumentPreUpdateNode() {
result = any(FunctionCall c).getArg(_)
or
// Avoid argument 0 of method calls as those have read post-update nodes.
@@ -136,6 +136,11 @@ module syntheticPostUpdateNode {
or
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
or
// any argument of any call that we have not been able to resolve
exists(CallNode call | not call = any(DataFlowCall c).getNode() |
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
)
}
/** An object might have its value changed after a store. */
@@ -704,7 +709,7 @@ newtype TDataFlowCall =
TFunctionCall(CallNode call) { call = any(FunctionValue f).getAFunctionCall() } or
/** Bound methods need to make room for the explicit self parameter */
TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
TClassCall(CallNode call) { call = any(ClassValue c).getACall() } or
TClassCall(CallNode call) { call = any(ClassValue c | not c.isAbsent()).getACall() } or
TSpecialCall(SpecialMethodCallNode special)
/** Represents a call. */
@@ -1067,19 +1072,18 @@ predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
}
/**
* Holds if `nodeFrom` flows into an attribute (corresponding to `c`) of `nodeTo` via an attribute assignment.
* Holds if `nodeFrom` flows into the attribute `c` of `nodeTo` via an attribute assignment.
*
* For example, in
* ```python
* obj.foo = x
* ```
* data flows from `x` to (the post-update node for) `obj` via assignment to `foo`.
* data flows from `x` to the attribute `foo` of (the post-update node for) `obj`.
*/
predicate attributeStoreStep(CfgNode nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
exists(AttrNode attr |
nodeFrom.asCfgNode() = attr.(DefinitionNode).getValue() and
attr.getName() = c.getAttribute() and
attr.getObject() = nodeTo.getPreUpdateNode().(CfgNode).getNode()
predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
exists(AttrWrite write |
write.accesses(nodeTo.getPreUpdateNode(), c.getAttribute()) and
nodeFrom = write.getValue()
)
}
@@ -1923,21 +1927,16 @@ pragma[noinline]
TupleElementContent small_tuple() { result.getIndex() <= 7 }
/**
* Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
* Holds if `nodeTo` is a read of the attribute `c` of the object `nodeFrom`.
*
* For example, in
* For example
* ```python
* obj.foo
* ```
* data flows from `obj` to `obj.foo` via a read from `foo`.
* is a read of the attribute `foo` from the object `obj`.
*/
predicate attributeReadStep(CfgNode nodeFrom, AttributeContent c, CfgNode nodeTo) {
exists(AttrNode attr |
nodeFrom.asCfgNode() = attr.getObject() and
nodeTo.asCfgNode() = attr and
attr.getName() = c.getAttribute() and
attr.isLoad()
)
predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo) {
nodeTo.accesses(nodeFrom, c.getAttribute())
}
/**
@@ -1973,6 +1972,18 @@ predicate clearsContent(Node n, Content c) {
kwOverflowClearStep(n, c)
or
matchClearStep(n, c)
or
attributeClearStep(n, c)
}
/**
* Holds if values stored inside attribute `c` are cleared at node `n`.
*
* In `obj.foo = x` any old value stored in `foo` is cleared at the pre-update node
* associated with `obj`
*/
predicate attributeClearStep(Node n, AttributeContent c) {
exists(PostUpdateNode post | post.getPreUpdateNode() = n | attributeStoreStep(_, c, post))
}
//--------

View File

@@ -9,6 +9,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* INTERNAL: Do not use.
@@ -66,7 +67,12 @@ string prettyNodeForInlineTest(DataFlow::Node node) {
result = "[post]" + prettyExpr(e)
)
or
exists(Expr e | e = node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr() |
result = "[pre]" + prettyExpr(e)
)
or
not exists(node.asExpr()) and
not exists(node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
not exists(node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr()) and
result = node.toString()
}

View File

@@ -114,49 +114,33 @@ class ClassList extends TClassList {
this = Empty() and result = Empty()
}
predicate legalMergeHead(ClassObjectInternal cls) {
this.getTail().doesNotContain(cls)
or
this = Empty()
}
predicate contains(ClassObjectInternal cls) {
cls = this.getHead()
or
this.getTail().contains(cls)
}
/** Use negative formulation to avoid negative recursion */
predicate doesNotContain(ClassObjectInternal cls) {
this.relevantForContains(cls) and
cls != this.getHead() and
this.getTail().doesNotContain(cls)
or
this = Empty()
}
private predicate relevantForContains(ClassObjectInternal cls) {
exists(ClassListList list |
list.getItem(_).getHead() = cls and
list.getItem(_) = this
)
or
exists(ClassList l |
l.relevantForContains(cls) and
this = l.getTail()
)
}
pragma[nomagic]
ClassObjectInternal findDeclaringClass(string name) {
exists(ClassDecl head | head = this.getHead().getClassDeclaration() |
if head.declaresAttribute(name)
then result = this.getHead()
else result = this.getTail().findDeclaringClass(name)
exists(ClassObjectInternal head, ClassList tail, ClassDecl decl |
this = Cons(head, tail) and decl = head.getClassDeclaration()
|
if decl.declaresAttribute(name) then result = head else result = tail.findDeclaringClass(name)
)
}
pragma[noinline]
private ClassObjectInternal findDeclaringClassAttribute(string name) {
result = this.findDeclaringClass(name) and
(
exists(any(Builtin b).getMember(name))
or
declaredAttributeVar(_, name, _)
)
}
predicate lookup(string name, ObjectInternal value, CfgOrigin origin) {
exists(ClassObjectInternal decl | decl = this.findDeclaringClass(name) |
exists(ClassObjectInternal decl | decl = this.findDeclaringClassAttribute(name) |
Types::declaredAttribute(decl, name, value, origin)
)
}
@@ -199,12 +183,18 @@ class ClassList extends TClassList {
or
this.duplicate(n) and result = this.deduplicate(n + 1)
or
exists(ClassObjectInternal cls |
n = this.firstIndex(cls) and
result = Cons(cls, this.deduplicate(n + 1))
exists(ClassObjectInternal cls, ClassList tail |
this.deduplicateCons(n, cls, tail) and
result = Cons(cls, tail)
)
}
pragma[nomagic]
private predicate deduplicateCons(int n, ClassObjectInternal cls, ClassList tail) {
n = this.firstIndex(cls) and
tail = this.deduplicate(n + 1)
}
predicate isEmpty() { this = Empty() }
ClassList reverse() { reverse_step(this, Empty(), result) }
@@ -273,6 +263,24 @@ private class ClassListList extends TClassListList {
result = this.getTail().getItem(n - 1)
}
/**
* Same as
*
* ```ql
* result = this.getItem(n) and n = this.length() - 1
* ```
*
* but avoids non-linear recursion.
*/
ClassList getLastItem(int n) {
n = 0 and this = ConsList(result, EmptyList())
or
exists(ClassListList tail |
this = ConsList(_, tail) and
result = tail.getLastItem(n - 1)
)
}
private ClassObjectInternal getAHead() {
result = this.getHead().getHead()
or
@@ -295,17 +303,26 @@ private class ClassListList extends TClassListList {
ClassObjectInternal cls, ClassList removed_head, ClassListList removed_tail, int n
) {
cls = this.bestMergeCandidate() and
n = this.length() - 1 and
removed_head = this.getItem(n).removeHead(cls) and
removed_head = this.getLastItem(n).removeHead(cls) and
removed_tail = EmptyList()
or
removed_head = this.removedClassPartsCons1(cls, removed_tail, n).removeHead(cls)
}
pragma[nomagic]
predicate removedClassPartsCons0(ClassObjectInternal cls, ClassListList removed_tail, int n) {
exists(ClassList prev_head, ClassListList prev_tail |
this.removedClassParts(cls, prev_head, prev_tail, n + 1) and
removed_head = this.getItem(n).removeHead(cls) and
removed_tail = ConsList(prev_head, prev_tail)
)
}
pragma[nomagic]
ClassList removedClassPartsCons1(ClassObjectInternal cls, ClassListList removed_tail, int n) {
this.removedClassPartsCons0(cls, removed_tail, n) and
result = this.getItem(n)
}
ClassListList remove(ClassObjectInternal cls) {
exists(ClassList removed_head, ClassListList removed_tail |
this.removedClassParts(cls, removed_head, removed_tail, 0) and
@@ -315,18 +332,34 @@ private class ClassListList extends TClassListList {
this = EmptyList() and result = EmptyList()
}
predicate legalMergeCandidate(ClassObjectInternal cls, int n) {
cls = this.getAHead() and n = this.length()
pragma[nomagic]
private predicate legalMergeCandidateNonEmpty(
ClassObjectInternal cls, ClassListList remainingList, ClassList remaining
) {
this.legalMergeCandidate(cls, ConsList(Cons(_, remaining), remainingList))
or
this.getItem(n).legalMergeHead(cls) and
this.legalMergeCandidate(cls, n + 1)
exists(ClassObjectInternal head |
this.legalMergeCandidateNonEmpty(cls, remainingList, Cons(head, remaining)) and
cls != head
)
}
predicate legalMergeCandidate(ClassObjectInternal cls) { this.legalMergeCandidate(cls, 0) }
private predicate legalMergeCandidate(ClassObjectInternal cls, ClassListList remaining) {
cls = this.getAHead() and remaining = this
or
this.legalMergeCandidate(cls, ConsList(Empty(), remaining))
or
this.legalMergeCandidateNonEmpty(cls, remaining, Empty())
}
pragma[noinline]
predicate legalMergeCandidate(ClassObjectInternal cls) {
this.legalMergeCandidate(cls, EmptyList())
}
pragma[noinline]
predicate illegalMergeCandidate(ClassObjectInternal cls) {
cls = this.getAHead() and
this.getItem(_).getTail().contains(cls)
this.legalMergeCandidateNonEmpty(cls, _, Cons(cls, _))
}
ClassObjectInternal bestMergeCandidate(int n) {
@@ -337,6 +370,7 @@ private class ClassListList extends TClassListList {
)
}
pragma[noinline]
ClassObjectInternal bestMergeCandidate() { result = this.bestMergeCandidate(0) }
/**
@@ -417,16 +451,27 @@ private predicate merge_step(
remaining_list = original
or
/* Removes the best merge candidate from `remaining_list` and prepends it to `reversed_mro` */
exists(ClassObjectInternal head, ClassList prev_reverse_mro, ClassListList prev_list |
merge_step(prev_reverse_mro, prev_list, original) and
head = prev_list.bestMergeCandidate() and
reversed_mro = Cons(head, prev_reverse_mro) and
remaining_list = prev_list.remove(head)
exists(ClassObjectInternal head, ClassList prev_reverse_mro |
merge_stepCons(head, prev_reverse_mro, remaining_list, original) and
reversed_mro = Cons(head, prev_reverse_mro)
)
or
merge_step(reversed_mro, ConsList(Empty(), remaining_list), original)
}
pragma[nomagic]
private predicate merge_stepCons(
ClassObjectInternal head, ClassList prev_reverse_mro, ClassListList remaining_list,
ClassListList original
) {
/* Removes the best merge candidate from `remaining_list` and prepends it to `reversed_mro` */
exists(ClassListList prev_list |
merge_step(prev_reverse_mro, prev_list, original) and
head = prev_list.bestMergeCandidate() and
remaining_list = prev_list.remove(head)
)
}
/* Helpers for `ClassList.reverse()` */
private predicate needs_reversing(ClassList lst) {
merge_step(lst, EmptyList(), _)
@@ -439,10 +484,17 @@ private predicate reverse_step(ClassList lst, ClassList remainder, ClassList rev
or
exists(ClassObjectInternal head, ClassList tail |
reversed = Cons(head, tail) and
reverse_step(lst, Cons(head, remainder), tail)
reverse_stepCons(lst, remainder, head, tail)
)
}
pragma[nomagic]
private predicate reverse_stepCons(
ClassList lst, ClassList remainder, ClassObjectInternal head, ClassList tail
) {
reverse_step(lst, Cons(head, remainder), tail)
}
module Mro {
cached
ClassList newStyleMro(ClassObjectInternal cls) {

View File

@@ -1429,20 +1429,51 @@ module Expressions {
}
pragma[noinline]
predicate subscriptPointsTo(
private predicate indexPointsToInt(ControlFlowNode index, PointsToContext context, int n) {
index = any(SubscriptNode subscr).getIndex() and
PointsToInternal::pointsTo(index, context, TInt(n), _)
}
pragma[noinline]
private predicate getItemSequenceObjectInternal(
ObjectInternal value, SequenceObjectInternal objvalue, int n
) {
value = objvalue.getItem(n)
}
pragma[noinline]
private predicate subscriptObjectAndIndexPointsToInt(
SubscriptNode subscr, PointsToContext context, ControlFlowNode obj, ObjectInternal objvalue,
int n
) {
exists(ControlFlowNode index |
subscriptObjectAndIndex(subscr, context, obj, objvalue, index) and
indexPointsToInt(index, context, n)
)
}
deprecated predicate subscriptPointsTo(
SubscriptNode subscr, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode obj, ObjectInternal objvalue
) {
subscriptPointsTo(subscr, context, value, obj, objvalue) and
origin = subscr
}
pragma[noinline]
private predicate subscriptPointsTo(
SubscriptNode subscr, PointsToContext context, ObjectInternal value, ControlFlowNode obj,
ObjectInternal objvalue
) {
exists(ControlFlowNode index | subscriptObjectAndIndex(subscr, context, obj, objvalue, index) |
objvalue.subscriptUnknown() and
value = ObjectInternal::unknown()
or
exists(int n |
PointsToInternal::pointsTo(index, context, TInt(n), _) and
value = objvalue.(SequenceObjectInternal).getItem(n)
)
) and
origin = subscr
)
or
exists(int n |
subscriptObjectAndIndexPointsToInt(subscr, context, obj, objvalue, n) and
getItemSequenceObjectInternal(value, objvalue, n)
)
}
predicate subscriptPartsPointsTo(
@@ -1466,15 +1497,22 @@ module Expressions {
index = subscr.getIndex()
}
deprecated predicate binaryPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode operand, ObjectInternal opvalue
) {
binaryPointsTo(b, context, value, operand, opvalue) and
origin = b
}
/**
* Tracking too many binary expressions is likely to kill performance, so just say anything other than addition or bitwise or is 'unknown'.
*/
pragma[noinline]
predicate binaryPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode operand, ObjectInternal opvalue
private predicate binaryPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode operand,
ObjectInternal opvalue
) {
origin = b and
operand = genericBinaryOperand(b) and
PointsToInternal::pointsTo(operand, context, opvalue, _) and
value = ObjectInternal::unknown()
@@ -1491,12 +1529,19 @@ module Expressions {
)
}
pragma[noinline]
predicate addPointsTo(
deprecated predicate addPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode operand, ObjectInternal opvalue
) {
origin = b and
addPointsTo(b, context, value, operand, opvalue) and
origin = b
}
pragma[noinline]
private predicate addPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode operand,
ObjectInternal opvalue
) {
exists(Operator op |
b.operands(operand, op, _)
or
@@ -1508,12 +1553,19 @@ module Expressions {
)
}
pragma[noinline]
predicate bitOrPointsTo(
deprecated predicate bitOrPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode operand, ObjectInternal opvalue
) {
origin = b and
bitOrPointsTo(b, context, value, operand, opvalue) and
origin = b
}
pragma[noinline]
private predicate bitOrPointsTo(
BinaryExprNode b, PointsToContext context, ObjectInternal value, ControlFlowNode operand,
ObjectInternal opvalue
) {
exists(Operator op, ControlFlowNode other |
b.operands(operand, op, other)
or
@@ -1533,10 +1585,18 @@ module Expressions {
value = obj.intValue()
}
pragma[noinline]
predicate unaryPointsTo(
deprecated predicate unaryPointsTo(
UnaryExprNode u, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode operand, ObjectInternal opvalue
) {
unaryPointsTo(u, context, value, operand, opvalue) and
origin = u
}
pragma[noinline]
private predicate unaryPointsTo(
UnaryExprNode u, PointsToContext context, ObjectInternal value, ControlFlowNode operand,
ObjectInternal opvalue
) {
exists(Unaryop op |
op = u.getNode().getOp() and
@@ -1548,14 +1608,21 @@ module Expressions {
op instanceof USub and value = ObjectInternal::fromInt(-opvalue.intValue())
or
not op instanceof Not and opvalue = ObjectInternal::unknown() and value = opvalue
) and
origin = u
)
}
deprecated predicate builtinCallPointsTo(
CallNode call, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode arg, ObjectInternal argvalue
) {
builtinCallPointsTo(call, context, value, arg, argvalue) and
origin = call
}
pragma[noinline]
predicate builtinCallPointsTo(
CallNode call, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode arg, ObjectInternal argvalue
private predicate builtinCallPointsTo(
CallNode call, PointsToContext context, ObjectInternal value, ControlFlowNode arg,
ObjectInternal argvalue
) {
PointsToInternal::pointsTo(arg, context, argvalue, _) and
arg = call.getArg(0) and
@@ -1569,8 +1636,7 @@ module Expressions {
callable != ObjectInternal::builtin("hasattr") and
callable.isClass() = false and
value = ObjectInternal::unknown()
) and
origin = call
)
}
pragma[noinline]
@@ -1585,11 +1651,10 @@ module Expressions {
pragma[noinline]
private predicate lenCallPointsTo(
CallNode call, PointsToContext context, ObjectInternal value, ControlFlowNode origin,
ControlFlowNode arg, ObjectInternal argvalue
CallNode call, PointsToContext context, ObjectInternal value, ControlFlowNode arg,
ObjectInternal argvalue
) {
len_call(call, arg, context, argvalue) and
origin = call and
exists(int len | len = argvalue.length() |
value = TInt(len) and len >= 0
or
@@ -1815,19 +1880,26 @@ module Expressions {
) {
attributePointsTo(expr, context, value, origin, subexpr, subvalue)
or
subscriptPointsTo(expr, context, value, origin, subexpr, subvalue)
subscriptPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
addPointsTo(expr, context, value, origin, subexpr, subvalue)
addPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
bitOrPointsTo(expr, context, value, origin, subexpr, subvalue)
bitOrPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
binaryPointsTo(expr, context, value, origin, subexpr, subvalue)
binaryPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
unaryPointsTo(expr, context, value, origin, subexpr, subvalue)
unaryPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
builtinCallPointsTo(expr, context, value, origin, subexpr, subvalue)
builtinCallPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
lenCallPointsTo(expr, context, value, origin, subexpr, subvalue)
lenCallPointsTo(expr, context, value, subexpr, subvalue) and
origin = expr
or
typeCallPointsTo(expr, context, value, origin, subexpr, subvalue)
or
@@ -2068,6 +2140,12 @@ module Conditionals {
}
}
/** INTERNAL: Do not use. */
predicate declaredAttributeVar(PythonClassObjectInternal cls, string name, EssaVariable var) {
name = var.getName() and
var.getAUse() = cls.getScope().getANormalExit()
}
cached
module Types {
cached
@@ -2163,8 +2241,7 @@ module Types {
or
value != ObjectInternal::undefined() and
exists(EssaVariable var |
name = var.getName() and
var.getAUse() = cls.(PythonClassObjectInternal).getScope().getANormalExit() and
declaredAttributeVar(cls, name, var) and
PointsToInternal::variablePointsTo(var, _, value, origin)
)
}

View File

@@ -427,6 +427,7 @@ abstract class RegexString extends Expr {
}
predicate normalCharacter(int start, int end) {
end = start + 1 and
this.character(start, end) and
not this.specialCharacter(start, end, _)
}
@@ -446,6 +447,49 @@ abstract class RegexString extends Expr {
)
}
/**
* Holds if the range [start:end) consists of only 'normal' characters.
*/
predicate normalCharacterSequence(int start, int end) {
// a normal character inside a character set is interpreted on its own
this.normalCharacter(start, end) and
this.inCharSet(start)
or
// a maximal run of normal characters is considered as one constant
exists(int s, int e |
e = max(int i | this.normalCharacterRun(s, i)) and
not this.inCharSet(s)
|
// 'abc' can be considered one constant, but
// 'abc+' has to be broken up into 'ab' and 'c+',
// as the qualifier only applies to 'c'.
if this.qualifier(e, _, _, _)
then
end = e and start = e - 1
or
end = e - 1 and start = s and start < end
else (
end = e and
start = s
)
)
}
private predicate normalCharacterRun(int start, int end) {
(
this.normalCharacterRun(start, end - 1)
or
start = end - 1 and not this.normalCharacter(start - 1, start)
) and
this.normalCharacter(end - 1, end)
}
private predicate characterItem(int start, int end) {
this.normalCharacterSequence(start, end) or
this.escapedCharacter(start, end) or
this.specialCharacter(start, end, _)
}
/** Whether the text in the range start,end is a group */
predicate group(int start, int end) {
this.groupContents(start, end, _, _)
@@ -717,7 +761,7 @@ abstract class RegexString extends Expr {
string getBackrefName(int start, int end) { this.named_backreference(start, end, result) }
private predicate baseItem(int start, int end) {
this.character(start, end) and
this.characterItem(start, end) and
not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
or
this.group(start, end)
@@ -837,14 +881,14 @@ abstract class RegexString extends Expr {
}
private predicate item_start(int start) {
this.character(start, _) or
this.characterItem(start, _) or
this.isGroupStart(start) or
this.charSet(start, _) or
this.backreference(start, _)
}
private predicate item_end(int end) {
this.character(_, end)
this.characterItem(_, end)
or
exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
or
@@ -953,7 +997,7 @@ abstract class RegexString extends Expr {
*/
predicate firstItem(int start, int end) {
(
this.character(start, end)
this.characterItem(start, end)
or
this.qualifiedItem(start, end, _, _)
or
@@ -968,7 +1012,7 @@ abstract class RegexString extends Expr {
*/
predicate lastItem(int start, int end) {
(
this.character(start, end)
this.characterItem(start, end)
or
this.qualifiedItem(start, end, _, _)
or